library(ArchR)
##
## / |
## / \
## . / |.
## \\\ / |.
## \\\ / `|.
## \\\ / |.
## \ / |\
## \\#####\ / ||
## ==###########> / ||
## \\##==......\ / ||
## ______ = =|__ /__ || \\\
## ,--' ,----`-,__ ___/' --,-`-===================##========>
## \ ' ##_______ _____ ,--,__,=##,__ ///
## , __== ___,-,__,--'#' ===' `-' | ##,-/
## -,____,---' \\####\\________________,--\\_##,/
## ___ .______ ______ __ __ .______
## / \ | _ \ / || | | | | _ \
## / ^ \ | |_) | | ,----'| |__| | | |_) |
## / /_\ \ | / | | | __ | | /
## / _____ \ | |\ \\___ | `----.| | | | | |\ \\___.
## /__/ \__\ | _| `._____| \______||__| |__| | _| `._____|
##
## ArchR : Version 1.0.2
## For more information see our website : www.ArchRProject.com
## If you encounter a bug please report : https://github.com/GreenleafLab/ArchR/issues
## Loading Required Packages...
## Loading Package : grid v4.2.1
## Loading Package : gridExtra v2.3
## Loading Package : gtools v3.9.4
## Loading Package : gtable v0.3.1
## Loading Package : ggplot2 v3.4.0.9000
## Loading Package : magrittr v2.0.3
## Loading Package : plyr v1.8.8
## Loading Package : stringr v1.5.0
## Loading Package : data.table v1.14.6
## Loading Package : matrixStats v0.63.0
## Loading Package : S4Vectors v0.36.1
## Warning: package 'S4Vectors' was built under R version 4.2.2
## Loading Package : GenomicRanges v1.48.0
## Loading Package : BiocGenerics v0.44.0
## Loading Package : Matrix v1.5.3
## Loading Package : Rcpp v1.0.9
## Loading Package : SummarizedExperiment v1.26.1
## Warning: multiple methods tables found for 'aperm'
## Warning: replacing previous import 'BiocGenerics::aperm' by
## 'DelayedArray::aperm' when loading 'SummarizedExperiment'
## Loading Package : rhdf5 v2.40.0
library(BSgenome.Hsapiens.UCSC.hg19)
## Loading required package: BSgenome
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'XVector'
## The following object is masked from 'package:plyr':
##
## compact
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:grid':
##
## pattern
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: rtracklayer
addArchRThreads(threads = 8)
## Setting default number of Parallel threads to 8.
inputFiles <- getTutorialData("Hematopoiesis")
## Downloading files to HemeFragments...
inputFiles
## scATAC_BMMC_R1
## "HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz"
## scATAC_CD34_BMMC_R1
## "HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz"
## scATAC_PBMC_R1
## "HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz"
addArchRGenome("hg19")
## Setting default genome to Hg19.
ArrowFiles <- createArrowFiles(
inputFiles = inputFiles,
sampleNames = names(inputFiles),
filterTSS = 4, #Dont set this too high because you can always increase later
filterFrags = 1000,
addTileMat = TRUE,
addGeneScoreMat = TRUE
)
## filterFrags is no longer a valid input. Please use minFrags! Setting filterFrags value to minFrags!
## filterTSS is no longer a valid input. Please use minTSS! Setting filterTSS value to minTSS!
## Using GeneAnnotation set by addArchRGenome(Hg19)!
## Using GeneAnnotation set by addArchRGenome(Hg19)!
## ArchR logging to : ArchRLogs/ArchR-createArrows-246065dc654-Date-2023-01-15_Time-20-50-37.log
## If there is an issue, please report to github with logFile!
## Cleaning Temporary Files
## 2023-01-15 20:50:38 : Batch Execution w/ safelapply!, 0 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-createArrows-246065dc654-Date-2023-01-15_Time-20-50-37.log
doubScores <- addDoubletScores(
input = ArrowFiles,
k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search with doublet projection.
LSIMethod = 1
)
## ArchR logging to : ArchRLogs/ArchR-addDoubletScores-246048451d94-Date-2023-01-15_Time-20-50-38.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:50:38 : Batch Execution w/ safelapply!, 0 mins elapsed.
## 2023-01-15 20:50:38 : scATAC_BMMC_R1 (1 of 3) : Computing Doublet Statistics, 0 mins elapsed.
## scATAC_BMMC_R1 (1 of 3) : UMAP Projection R^2 = 0.98404
## scATAC_BMMC_R1 (1 of 3) : UMAP Projection R^2 = 0.98404
## 2023-01-15 20:52:08 : scATAC_CD34_BMMC_R1 (2 of 3) : Computing Doublet Statistics, 1.498 mins elapsed.
## scATAC_CD34_BMMC_R1 (2 of 3) : UMAP Projection R^2 = 0.9927
## scATAC_CD34_BMMC_R1 (2 of 3) : UMAP Projection R^2 = 0.9927
## 2023-01-15 20:53:11 : scATAC_PBMC_R1 (3 of 3) : Computing Doublet Statistics, 2.56 mins elapsed.
## scATAC_PBMC_R1 (3 of 3) : UMAP Projection R^2 = 0.98207
## scATAC_PBMC_R1 (3 of 3) : UMAP Projection R^2 = 0.98207
## ArchR logging successful to : ArchRLogs/ArchR-addDoubletScores-246048451d94-Date-2023-01-15_Time-20-50-38.log
projHeme1 <- ArchRProject(
ArrowFiles = ArrowFiles,
outputDirectory = "HemeOutput",
copyArrows = TRUE
)
## Using GeneAnnotation set by addArchRGenome(Hg19)!
## Using GeneAnnotation set by addArchRGenome(Hg19)!
## Validating Arrows...
## Getting SampleNames...
##
## Copying ArrowFiles to Ouptut Directory! If you want to save disk space set copyArrows = FALSE
## 1 2 3
## Getting Cell Metadata...
##
## Merging Cell Metadata...
## Initializing ArchRProject...
##
## / |
## / \
## . / |.
## \\\ / |.
## \\\ / `|.
## \\\ / |.
## \ / |\
## \\#####\ / ||
## ==###########> / ||
## \\##==......\ / ||
## ______ = =|__ /__ || \\\
## ,--' ,----`-,__ ___/' --,-`-===================##========>
## \ ' ##_______ _____ ,--,__,=##,__ ///
## , __== ___,-,__,--'#' ===' `-' | ##,-/
## -,____,---' \\####\\________________,--\\_##,/
## ___ .______ ______ __ __ .______
## / \ | _ \ / || | | | | _ \
## / ^ \ | |_) | | ,----'| |__| | | |_) |
## / /_\ \ | / | | | __ | | /
## / _____ \ | |\ \\___ | `----.| | | | | |\ \\___.
## /__/ \__\ | _| `._____| \______||__| |__| | _| `._____|
##
getAvailableMatrices(projHeme1)
## [1] "GeneScoreMatrix" "TileMatrix"
head(projHeme1$cellNames)
## [1] "scATAC_BMMC_R1#TTATGTCAGTGATTAG-1" "scATAC_BMMC_R1#AAGATAGTCACCGCGA-1"
## [3] "scATAC_BMMC_R1#GCATTGAAGATTCCGT-1" "scATAC_BMMC_R1#TATGTTCAGGGTTCCC-1"
## [5] "scATAC_BMMC_R1#TCCATCGGTCCCGTGA-1" "scATAC_BMMC_R1#AGTTACGAGAACGTCG-1"
table(projHeme1$Sample)
##
## scATAC_BMMC_R1 scATAC_CD34_BMMC_R1 scATAC_PBMC_R1
## 4932 3275 2453
quantile(projHeme1$TSSEnrichment)
## 0% 25% 50% 75% 100%
## 4.10900 13.92550 16.81500 19.93025 41.98000
QC_df <- getCellColData(projHeme1, select = c("log10(nFrags)", "TSSEnrichment"))
QC_df
## DataFrame with 10660 rows and 2 columns
## log10(nFrags) TSSEnrichment
## <numeric> <numeric>
## scATAC_BMMC_R1#TTATGTCAGTGATTAG-1 4.41812 7.204
## scATAC_BMMC_R1#AAGATAGTCACCGCGA-1 4.31488 7.949
## scATAC_BMMC_R1#GCATTGAAGATTCCGT-1 4.27852 4.447
## scATAC_BMMC_R1#TATGTTCAGGGTTCCC-1 4.26236 6.941
## scATAC_BMMC_R1#TCCATCGGTCCCGTGA-1 4.24199 4.771
## ... ... ...
## scATAC_PBMC_R1#GCTGCGAAGATCCGAG-1 3.01620 24.257
## scATAC_PBMC_R1#GCAGCTGGTGGCCTTG-1 3.01578 22.537
## scATAC_PBMC_R1#GCAGATTGTACGCAAG-1 3.01410 19.888
## scATAC_PBMC_R1#TTCGTTACATTGAACC-1 3.01410 30.000
## scATAC_PBMC_R1#CGCTATCGTGAGGTCA-1 3.00087 21.287
class(QC_df)
## [1] "DFrame"
## attr(,"package")
## [1] "S4Vectors"
ggPoint(
x = QC_df[,1],
y = QC_df[,2],
colorDensity = TRUE,
continuousSet = "sambaNight",
xlabel = "Log10 Unique Fragments",
ylabel = "TSS Enrichment",
xlim = c(log10(500), quantile(QC_df[,1], probs = 0.99)),
ylim = c(0, quantile(QC_df[,2], probs = 0.99))
) + geom_hline(yintercept = 4, lty = "dashed") + geom_vline(xintercept = 3, lty = "dashed")
p1=plotGroups(
ArchRProj = projHeme1,
groupBy = "Sample",
colorBy = "cellColData",
name = "TSSEnrichment",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
## 1
p2=plotGroups(
ArchRProj = projHeme1,
groupBy = "Sample",
colorBy = "cellColData",
name = "log10(nFrags)",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
## 1
p1+p2
p1 <- plotFragmentSizes(ArchRProj = projHeme1)
## ArchR logging to : ArchRLogs/ArchR-plotFragmentSizes-246020f871fd-Date-2023-01-15_Time-20-54-16.log
## If there is an issue, please report to github with logFile!
## ArchR logging successful to : ArchRLogs/ArchR-plotFragmentSizes-246020f871fd-Date-2023-01-15_Time-20-54-16.log
p2 <- plotTSSEnrichment(ArchRProj = projHeme1)
## ArchR logging to : ArchRLogs/ArchR-plotTSSEnrichment-24601703a9f4-Date-2023-01-15_Time-20-54-31.log
## If there is an issue, please report to github with logFile!
## ArchR logging successful to : ArchRLogs/ArchR-plotTSSEnrichment-24601703a9f4-Date-2023-01-15_Time-20-54-31.log
p1+p2
#saveArchRProject(ArchRProj = projHeme1, outputDirectory = "Save-ProjHeme1", load = FALSE)
projHeme2 <- filterDoublets(projHeme1)
## Filtering 410 cells from ArchRProject!
## scATAC_BMMC_R1 : 243 of 4932 (4.9%)
## scATAC_CD34_BMMC_R1 : 107 of 3275 (3.3%)
## scATAC_PBMC_R1 : 60 of 2453 (2.4%)
projHeme2 <- addIterativeLSI(
ArchRProj = projHeme2,
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = 1:30
)
## Checking Inputs...
## ArchR logging to : ArchRLogs/ArchR-addIterativeLSI-24606ea38868-Date-2023-01-15_Time-20-54-57.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:54:57 : Computing Total Across All Features, 0.001 mins elapsed.
## 2023-01-15 20:54:59 : Computing Top Features, 0.023 mins elapsed.
## ###########
## 2023-01-15 20:55:00 : Running LSI (1 of 2) on Top Features, 0.042 mins elapsed.
## ###########
## 2023-01-15 20:55:00 : Sampling Cells (N = 10001) for Estimated LSI, 0.043 mins elapsed.
## 2023-01-15 20:55:00 : Creating Sampled Partial Matrix, 0.043 mins elapsed.
## 2023-01-15 20:55:05 : Computing Estimated LSI (projectAll = FALSE), 0.133 mins elapsed.
## 2023-01-15 20:55:32 : Identifying Clusters, 0.585 mins elapsed.
## 2023-01-15 20:55:45 : Identified 6 Clusters, 0.788 mins elapsed.
## 2023-01-15 20:55:45 : Saving LSI Iteration, 0.788 mins elapsed.
## 2023-01-15 20:55:56 : Creating Cluster Matrix on the total Group Features, 0.986 mins elapsed.
## 2023-01-15 20:56:08 : Computing Variable Features, 1.186 mins elapsed.
## ###########
## 2023-01-15 20:56:09 : Running LSI (2 of 2) on Variable Features, 1.187 mins elapsed.
## ###########
## 2023-01-15 20:56:09 : Creating Partial Matrix, 1.188 mins elapsed.
## 2023-01-15 20:56:14 : Computing LSI, 1.276 mins elapsed.
## 2023-01-15 20:56:47 : Finished Running IterativeLSI, 1.821 mins elapsed.
projHeme2 <- addHarmony(
ArchRProj = projHeme2,
reducedDims = "IterativeLSI",
name = "Harmony",
groupBy = "Sample"
)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony converged after 3 iterations
projHeme2 <- addClusters(
input = projHeme2,
reducedDims = "IterativeLSI",
method = "Seurat",
name = "Clusters",
resolution = 0.8
)
## ArchR logging to : ArchRLogs/ArchR-addClusters-24605c94d3dc-Date-2023-01-15_Time-20-56-54.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:56:54 : Running Seurats FindClusters (Stuart et al. Cell 2019), 0.001 mins elapsed.
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10250
## Number of edges: 456227
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8670
## Number of communities: 12
## Elapsed time: 0 seconds
## 2023-01-15 20:57:06 : Testing Outlier Clusters, 0.2 mins elapsed.
## 2023-01-15 20:57:06 : Assigning Cluster Names to 12 Clusters, 0.2 mins elapsed.
## 2023-01-15 20:57:06 : Finished addClusters, 0.201 mins elapsed.
table(projHeme2$Clusters)
##
## C1 C10 C11 C12 C2 C3 C4 C5 C6 C7 C8 C9
## 1454 357 307 385 1116 899 1140 1389 1242 815 726 420
projHeme2 <- addClusters(
input = projHeme2,
reducedDims = "Harmony",
method = "Seurat",
name = "HarmonyClusters",
resolution = 0.8
)
## ArchR logging to : ArchRLogs/ArchR-addClusters-246030b7de2d-Date-2023-01-15_Time-20-57-06.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:57:06 : Running Seurats FindClusters (Stuart et al. Cell 2019), 0.004 mins elapsed.
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10250
## Number of edges: 431341
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8587
## Number of communities: 13
## Elapsed time: 0 seconds
## 2023-01-15 20:57:18 : Testing Outlier Clusters, 0.196 mins elapsed.
## 2023-01-15 20:57:18 : Assigning Cluster Names to 13 Clusters, 0.196 mins elapsed.
## 2023-01-15 20:57:18 : Finished addClusters, 0.197 mins elapsed.
table(projHeme2$HarmonyClusters)
##
## C1 C10 C11 C12 C13 C2 C3 C4 C5 C6 C7 C8 C9
## 678 145 448 387 305 991 1242 564 2473 1061 1278 536 142
projHeme2 <- addUMAP(
ArchRProj = projHeme2,
reducedDims = "IterativeLSI",
name = "UMAP",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine"
)
## 20:57:18 UMAP embedding parameters a = 0.583 b = 1.334
## 20:57:18 Read 10250 rows and found 30 numeric columns
## 20:57:18 Using Annoy for neighbor search, n_neighbors = 30
## 20:57:18 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:57:19 Writing NN index file to temp file /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/file24602c1c5f70
## 20:57:19 Searching Annoy index using 8 threads, search_k = 3000
## 20:57:19 Annoy recall = 100%
## 20:57:20 Commencing smooth kNN distance calibration using 8 threads with target n_neighbors = 30
## 20:57:21 Initializing from normalized Laplacian + noise (using irlba)
## 20:57:21 Commencing optimization for 200 epochs, with 467090 positive edges
## 20:57:26 Optimization finished
## 20:57:26 Creating temp model dir /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/dir246041c2981d
## 20:57:26 Creating dir /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/dir246041c2981d
## 20:57:27 Changing to /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/dir246041c2981d
## 20:57:27 Creating /Users/nzhang/Desktop/ArchR/HemeOutput/Embeddings/Save-Uwot-UMAP-Params-IterativeLSI-24607abdc8af-Date-2023-01-15_Time-20-57-26.tar
## Warning: invalid uid value replaced by that for user 'nobody'
## Warning: invalid gid value replaced by that for user 'nobody'
projHeme2 <- addUMAP(
ArchRProj = projHeme2,
reducedDims = "Harmony",
name = "UMAPHarmony",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine"
)
## 20:57:27 UMAP embedding parameters a = 0.583 b = 1.334
## 20:57:27 Read 10250 rows and found 30 numeric columns
## 20:57:27 Using Annoy for neighbor search, n_neighbors = 30
## 20:57:27 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:57:28 Writing NN index file to temp file /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/file24607f8c27ee
## 20:57:28 Searching Annoy index using 8 threads, search_k = 3000
## 20:57:28 Annoy recall = 100%
## 20:57:29 Commencing smooth kNN distance calibration using 8 threads with target n_neighbors = 30
## 20:57:30 Initializing from normalized Laplacian + noise (using irlba)
## 20:57:30 Commencing optimization for 200 epochs, with 468468 positive edges
## 20:57:35 Optimization finished
## 20:57:35 Creating temp model dir /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/dir246025b8811a
## 20:57:35 Creating dir /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/dir246025b8811a
## 20:57:36 Changing to /var/folders/2_/g44wy4692nx5kdrctq7f1vk899whll/T//RtmpaItRA1/dir246025b8811a
## 20:57:36 Creating /Users/nzhang/Desktop/ArchR/HemeOutput/Embeddings/Save-Uwot-UMAP-Params-Harmony-24604a91bbad-Date-2023-01-15_Time-20-57-35.tar
## Warning: invalid uid value replaced by that for user 'nobody'
## Warning: invalid gid value replaced by that for user 'nobody'
p1 <- plotEmbedding(ArchRProj = projHeme2, colorBy = "cellColData", name = "Sample", embedding = "UMAP")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-246060a39f73-Date-2023-01-15_Time-20-57-36.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-246060a39f73-Date-2023-01-15_Time-20-57-36.log
p2 <- plotEmbedding(ArchRProj = projHeme2, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-246016396896-Date-2023-01-15_Time-20-57-37.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-246016396896-Date-2023-01-15_Time-20-57-37.log
p1+p2
p3 <- plotEmbedding(ArchRProj = projHeme2, colorBy = "cellColData", name = "Sample", embedding = "UMAPHarmony")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24603e0dd07f-Date-2023-01-15_Time-20-57-39.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24603e0dd07f-Date-2023-01-15_Time-20-57-39.log
p4 <- plotEmbedding(ArchRProj = projHeme2, colorBy = "cellColData", name = "Clusters", embedding = "UMAPHarmony")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24607cf361ac-Date-2023-01-15_Time-20-57-39.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24607cf361ac-Date-2023-01-15_Time-20-57-39.log
p3+p4
p5 <- plotEmbedding(ArchRProj = projHeme2, colorBy = "cellColData", name = "Sample", embedding = "UMAPHarmony")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24603b777792-Date-2023-01-15_Time-20-57-42.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24603b777792-Date-2023-01-15_Time-20-57-42.log
p6 <- plotEmbedding(ArchRProj = projHeme2, colorBy = "cellColData", name = "HarmonyClusters", embedding = "UMAPHarmony")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2460204b30be-Date-2023-01-15_Time-20-57-42.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2460204b30be-Date-2023-01-15_Time-20-57-42.log
p5+p6
# Identify marker genes
markersGS <- getMarkerFeatures(
ArchRProj = projHeme2,
useMatrix = "GeneScoreMatrix",
groupBy = "Clusters",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon"
)
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-246012e7618a-Date-2023-01-15_Time-20-57-45.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Double.Matrix
## 2023-01-15 20:57:45 : Matching Known Biases, 0.001 mins elapsed.
## ###########
## 2023-01-15 20:58:52 : Completed Pairwise Tests, 1.122 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-246012e7618a-Date-2023-01-15_Time-20-57-45.log
markerList <- getMarkers(markersGS, cutOff = "FDR <= 0.01 & Log2FC >= 1.25")
markerList$C12
## DataFrame with 288 rows and 9 columns
## seqnames start end strand name idx Log2FC
## <Rle> <array> <array> <array> <array> <array> <numeric>
## 8362 chr16 49891830 49524515 2 ZNF423 539 1.34005
## 7024 chr14 105781914 105675623 2 BRF1 763 1.44923
## 7025 chr14 105714879 105717430 1 BTBD6 764 1.89012
## 16344 chr4 5894785 5822491 2 CRMP1 82 2.74747
## 8726 chr16 89043504 88941263 2 CBFA2T3 903 1.52691
## ... ... ... ... ... ... ... ...
## 3106 chr10 123748689 124014057 1 TACC2 793 1.67167
## 14586 chr22 24032961 24041363 1 RGL4 128 1.96395
## 2053 chr1 217311097 216676588 2 ESRRG 2053 1.52471
## 11662 chr19 50004125 50004042 2 MIR150 1210 1.44071
## 7297 chr15 43882451 43825660 2 PPIP5K1 260 3.17624
## FDR MeanDiff
## <numeric> <numeric>
## 8362 1.16473e-17 1.674123
## 7024 1.88240e-17 2.029502
## 7025 7.70920e-15 2.146752
## 16344 7.70920e-15 0.936475
## 8726 2.98559e-14 1.916347
## ... ... ...
## 3106 0.00892412 0.2822542
## 14586 0.00945525 0.7592742
## 2053 0.00948083 0.3883958
## 11662 0.00975978 1.2129899
## 7297 0.00977352 0.0806601
markerGenes <- c(
"CD34", #Early Progenitor
"GATA1", #Erythroid
"PAX5", "MS4A1", "EBF1", "MME", #B-Cell Trajectory
"CD14", "CEBPB", "MPO", #Monocytes
"IRF8",
"CD3D", "CD8A", "TBX21", "IL7R" #TCells
)
heatmapGS <- markerHeatmap(
seMarker = markersGS,
cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
labelMarkers = markerGenes,
transpose = TRUE
)
## Warning: 'markerHeatmap' is deprecated.
## Use 'plotMarkerHeatmap' instead.
## See help("Deprecated")
## ArchR logging to : ArchRLogs/ArchR-plotMarkerHeatmap-246035c60dd2-Date-2023-01-15_Time-20-58-53.log
## If there is an issue, please report to github with logFile!
## Printing Top Marker Genes:
## C1:
## RBP7, PADI6, CDA, LINC01226, WLS, SRGAP2D, RPTN, MNDA, FCGR2C, FCGR2B, RNASEL, FCAMR, MIR604, UBE2D1, MIR4679-2
## C2:
## PERM1, ARHGEF10L, FGR, PPIEL, MYCL, TMCO2, BTBD19, TCTEX1D4, SLC5A9, L1TD1, RPE65, GPR88, MYBPHL, GNAT2, CASQ2
## C3:
## LINC01777, AJAP1, NPPA-AS1, CLCNKB, KLHDC7A, PAX7, RNF186, UBXN10, FAM43B, ALPL, WNT4, EPHA8, MIR378F, MYOM3, TRIM63
## C4:
## ANGPTL1, DEPP1, RBP3, OPN4, SLIT1-AS1, KRTAP5-9, CCNA1, PCDH8, LINC00446, MIR3529, MPO, LAMA1, BOD1L2, PRTN3, ELANE
## C5:
## LINC02593, PRDM16, MFAP2, PADI3, HSPG2, MIR4253, CATSPER4, C1orf94, GJB5, ELAVL4, GLIS1, ACOT11, SGIP1, ST6GALNAC5, SLC6A17
## C6:
## SNORA59B, RCAN3AS, RCAN3, ORC1, S1PR1, FNDC7, AMIGO1, CHI3L2, HIST2H2BC, CIART, MIR557, GATA3-AS1, RTKN2, JAKMIP3, PDE3B
## C7:
## TNFRSF25, HSD17B7, GAS5-AS1, BNIP3, PGGHG, PTPRCAP, SNORA8, RPL13P5, MCRS1, PIP4K2C, SNORD116-3, IPW, EMC4, GCHFR, TELO2
## C8:
## TNFRSF9, RUNX3, LCK, LEXM, IL12RB2, GBP2, GBP5, LOC729930, VCAM1, CHIAP2, BCL2L15, CD160, PDZK1, CDC42SE1, SH2D2A
## C9:
## LINC01346, PRDM2, CNR2, SHISAL2A, MFSD14A, SLC16A4, PIFO, VANGL2, SLAMF6, KIAA0040, C1orf220, MIR4424, RALGPS2, RGS1, RGS13
## C10:
## CALML6, PLCH2, MAD2L2, MDS2, SLFNL1, BTF3L4, ROR1, KLHDC8A, ZNF22, ZNF488, SLC16A9, AP3M1, MIR346, NUDT9P1, PIK3AP1
## C11:
## ANKRD65, TMEM88B, PLA2G5, PLA2G2D, NCMAP, LIN28A, MAP7D1, SCMH1, TTC39A, GBP1P1, HAO2, NUDT17, RORC, CRNN, LCE5A
## C12:
## MIR551A, SLC2A7, HCRTR1, CSMD2, TFAP2E, KLF17, AGBL4, ERICH3, LOC729970, NTNG1, UCK2, MYOC, MIR5191, ESRRG, MIR5008
## Identified 2615 markers!
## [1] "CD34" "GATA1" "PAX5" "MS4A1" "EBF1" "MME" "CD14" "CEBPB" "MPO"
## [10] "IRF8" "CD3D" "CD8A" "TBX21" "IL7R"
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## ArchR logging successful to : ArchRLogs/ArchR-plotMarkerHeatmap-246035c60dd2-Date-2023-01-15_Time-20-58-53.log
ComplexHeatmap::draw(heatmapGS, heatmap_legend_side = "bot", annotation_legend_side = "bot")
markerGenes <- c(
"CD34", #Early Progenitor
"GATA1", #Erythroid
"PAX5", "MS4A1", "MME", #B-Cell Trajectory
"CD14", "MPO", #Monocytes
"CD3D", "CD8A"#TCells
)
p <- plotEmbedding(
ArchRProj = projHeme2,
colorBy = "GeneScoreMatrix",
name = markerGenes,
embedding = "UMAP",
quantCut = c(0.01, 0.95),
imputeWeights = NULL
)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2460d47e9e3-Date-2023-01-15_Time-20-58-56.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2023-01-15 20:58:56 :
##
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2460d47e9e3-Date-2023-01-15_Time-20-58-56.log
p2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))
# Impute gene scores
projHeme2 <- addImputeWeights(projHeme2)
## ArchR logging to : ArchRLogs/ArchR-addImputeWeights-24606e67c394-Date-2023-01-15_Time-20-59-05.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:59:05 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.
p <- plotEmbedding(
ArchRProj = projHeme2,
colorBy = "GeneScoreMatrix",
name = markerGenes,
embedding = "UMAP",
imputeWeights = getImputeWeights(projHeme2)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24603e3c6718-Date-2023-01-15_Time-20-59-12.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2023-01-15 20:59:13 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24603e3c6718-Date-2023-01-15_Time-20-59-12.log
#Rearrange for grid plotting
p2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))
p <- plotBrowserTrack(
ArchRProj = projHeme2,
groupBy = "Clusters",
geneSymbol = "CD14",
upstream = 50000,
downstream = 50000
)
## ArchR logging to : ArchRLogs/ArchR-plotBrowserTrack-24607c78341f-Date-2023-01-15_Time-20-59-24.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:59:24 : Validating Region, 0.002 mins elapsed.
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | gene_id symbol
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chr5 140011313-140013286 - | 929 CD14
## -------
## seqinfo: 24 sequences from hg19 genome
## 2023-01-15 20:59:24 : Adding Bulk Tracks (1 of 1), 0.003 mins elapsed.
## 2023-01-15 20:59:25 : Adding Gene Tracks (1 of 1), 0.015 mins elapsed.
## 2023-01-15 20:59:25 : Plotting, 0.02 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotBrowserTrack-24607c78341f-Date-2023-01-15_Time-20-59-24.log
grid::grid.newpage()
grid::grid.draw(p$CD14)
#saveArchRProject(ArchRProj = projHeme2, outputDirectory = "Save-ProjHeme2", load = FALSE)
seRNA <- readRDS("scRNA-Hematopoiesis-Granja-2019.rds")
projHeme2 <- addGeneIntegrationMatrix(
ArchRProj = projHeme2,
useMatrix = "GeneScoreMatrix",
matrixName = "GeneIntegrationMatrix",
reducedDims = "IterativeLSI",
seRNA = seRNA,
addToArrow = FALSE,
groupRNA = "BioClassification",
nameCell = "predictedCell_Un",
nameGroup = "predictedGroup_Un",
nameScore = "predictedScore_Un"
)
## ArchR logging to : ArchRLogs/ArchR-addGeneIntegrationMatrix-2460272b8682-Date-2023-01-15_Time-20-59-30.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 20:59:30 : Running Seurat's Integration Stuart* et al 2019, 0.001 mins elapsed.
## 2023-01-15 20:59:30 : Checking ATAC Input, 0.005 mins elapsed.
## 2023-01-15 20:59:30 : Checking RNA Input, 0.008 mins elapsed.
## 2023-01-15 20:59:36 : Found 18601 overlapping gene names from gene scores and rna matrix!, 0.113 mins elapsed.
## 2023-01-15 20:59:36 : Creating Integration Blocks, 0.113 mins elapsed.
## 2023-01-15 20:59:37 : Prepping Interation Data, 0.117 mins elapsed.
## 2023-01-15 20:59:37 : Computing Integration in 1 Integration Blocks!, 0 mins elapsed.
## 2023-01-15 20:59:37 : Block (1 of 1) : Computing Integration, 0 mins elapsed.
## 2023-01-15 20:59:39 : Block (1 of 1) : Identifying Variable Genes, 0.038 mins elapsed.
## 2023-01-15 20:59:43 : Block (1 of 1) : Getting GeneScoreMatrix, 0.092 mins elapsed.
## 2023-01-15 20:59:49 : Block (1 of 1) : Imputing GeneScoreMatrix, 0.193 mins elapsed.
## Getting ImputeWeights
## 2023-01-15 21:00:19 : Block (1 of 1) : Seurat FindTransferAnchors, 0.698 mins elapsed.
## 2023-01-15 21:05:57 : Block (1 of 1) : Seurat TransferData Cell Group Labels, 6.325 mins elapsed.
## 2023-01-15 21:05:58 : Block (1 of 1) : Seurat TransferData Cell Names Labels, 6.354 mins elapsed.
## 2023-01-15 21:06:19 : Block (1 of 1) : Saving TransferAnchors Joint CCA, 6.697 mins elapsed.
## 2023-01-15 21:06:20 : Block (1 of 1) : Completed Integration, 6.718 mins elapsed.
## 2023-01-15 21:06:21 : Block (1 of 1) : Plotting Joint UMAP, 6.729 mins elapsed.
## Length of unique values greater than palette, interpolating..
## 2023-01-15 21:06:47 : Completed Integration with RNA Matrix, 7.163 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addGeneIntegrationMatrix-2460272b8682-Date-2023-01-15_Time-20-59-30.log
head(projHeme2$predictedGroup_Un)
## [1] "08_GMP.Neut" "22_CD4.M" "16_Pre.B" "06_CLP.1" "16_Pre.B"
## [6] "08_GMP.Neut"
# The predicted cell type for each scATAC-seq cluster
cM <- as.matrix(confusionMatrix(projHeme2$Clusters, projHeme2$predictedGroup_Un))
preClust <- colnames(cM)[apply(cM, 1 , which.max)]
cbind(preClust, rownames(cM)) #Assignments
## preClust
## [1,] "17_B" "C9"
## [2,] "20_CD4.N1" "C6"
## [3,] "16_Pre.B" "C10"
## [4,] "08_GMP.Neut" "C4"
## [5,] "11_CD14.Mono.1" "C1"
## [6,] "03_Late.Eryth" "C3"
## [7,] "22_CD4.M" "C7"
## [8,] "01_HSC" "C5"
## [9,] "09_pDC" "C11"
## [10,] "25_NK" "C8"
## [11,] "12_CD14.Mono.2" "C2"
## [12,] "15_CLP.2" "C12"
table(seRNA@colData$BioClassification)
##
## 01_HSC 02_Early.Eryth 03_Late.Eryth 04_Early.Baso 05_CMP.LMPP
## 1425 1653 446 111 2260
## 06_CLP.1 07_GMP 08_GMP.Neut 09_pDC 10_cDC
## 903 2097 1050 544 325
## 11_CD14.Mono.1 12_CD14.Mono.2 13_CD16.Mono 14_Unk 15_CLP.2
## 1800 4222 292 520 377
## 16_Pre.B 17_B 18_Plasma 19_CD8.N 20_CD4.N1
## 710 1711 62 1521 2470
## 21_CD4.N2 22_CD4.M 23_CD8.EM 24_CD8.CM 25_NK
## 2364 3539 796 2080 2143
## 26_Unk
## 161
#scRNA-seq 19-25 are T or NK cells
cTNK <- paste0(paste0(19:25), collapse="|")
cTNK
## [1] "19|20|21|22|23|24|25"
#scRNA-seq other cells
cNonTNK <- paste0(c(paste0("0", 1:9), 10:13, 15:18), collapse="|")
cNonTNK
## [1] "01|02|03|04|05|06|07|08|09|10|11|12|13|15|16|17|18"
#scATAC T or NK clusters
clustTNK <- rownames(cM)[grep(cTNK, preClust)]
clustTNK
## [1] "C6" "C7" "C8"
#scATAC none T or NK clusters
clustNonTNK <- rownames(cM)[grep(cNonTNK, preClust)]
clustNonTNK
## [1] "C9" "C10" "C4" "C1" "C3" "C5" "C11" "C2" "C12"
#scRNA T NK cell names
rnaTNK <- colnames(seRNA)[grep(cTNK, colData(seRNA)$BioClassification)]
head(rnaTNK)
## [1] "PBMC_10x_GREENLEAF_REP1:AAACCCAGTCGTCATA-1"
## [2] "PBMC_10x_GREENLEAF_REP1:AAACCCATCCGATGTA-1"
## [3] "PBMC_10x_GREENLEAF_REP1:AAACCCATCTCAACGA-1"
## [4] "PBMC_10x_GREENLEAF_REP1:AAACCCATCTCTCGAC-1"
## [5] "PBMC_10x_GREENLEAF_REP1:AAACGAACAATCGTCA-1"
## [6] "PBMC_10x_GREENLEAF_REP1:AAACGAACACGATTCA-1"
rnaNonTNK <- colnames(seRNA)[grep(cNonTNK, colData(seRNA)$BioClassification)]
head(rnaNonTNK)
## [1] "CD34_32_R5:AAACCTGAGTATCGAA-1" "CD34_32_R5:AAACCTGAGTCGTTTG-1"
## [3] "CD34_32_R5:AAACCTGGTTCCACAA-1" "CD34_32_R5:AAACGGGAGCTTCGCG-1"
## [5] "CD34_32_R5:AAACGGGAGGGAGTAA-1" "CD34_32_R5:AAACGGGAGTTACGGG-1"
groupList <- SimpleList(
TNK = SimpleList(
ATAC = projHeme2$cellNames[projHeme2$Clusters %in% clustTNK],
RNA = rnaTNK
),
NonTNK = SimpleList(
ATAC = projHeme2$cellNames[projHeme2$Clusters %in% clustNonTNK],
RNA = rnaNonTNK
)
)
projHeme2 <- addGeneIntegrationMatrix(
ArchRProj = projHeme2,
useMatrix = "GeneScoreMatrix",
matrixName = "GeneIntegrationMatrix",
reducedDims = "IterativeLSI",
seRNA = seRNA,
addToArrow = FALSE,
groupList = groupList,
groupRNA = "BioClassification",
nameCell = "predictedCell_Co",
nameGroup = "predictedGroup_Co",
nameScore = "predictedScore_Co"
)
## ArchR logging to : ArchRLogs/ArchR-addGeneIntegrationMatrix-24605ff5ff5c-Date-2023-01-15_Time-21-06-47.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:06:47 : Running Seurat's Integration Stuart* et al 2019, 0.001 mins elapsed.
## 2023-01-15 21:06:48 : Checking ATAC Input, 0.005 mins elapsed.
## 2023-01-15 21:06:48 : Checking RNA Input, 0.008 mins elapsed.
## 2023-01-15 21:06:54 : Found 18601 overlapping gene names from gene scores and rna matrix!, 0.105 mins elapsed.
## 2023-01-15 21:06:54 : Creating Integration Blocks, 0.105 mins elapsed.
## 2023-01-15 21:06:54 : Prepping Interation Data, 0.108 mins elapsed.
## 2023-01-15 21:06:54 : Computing Integration in 2 Integration Blocks!, 0 mins elapsed.
## 2023-01-15 21:12:15 : Block (1 of 2) : Plotting Joint UMAP, 5.339 mins elapsed.
## 2023-01-15 21:12:32 : Block (2 of 2) : Plotting Joint UMAP, 5.627 mins elapsed.
## 2023-01-15 21:12:55 : Completed Integration with RNA Matrix, 6.013 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addGeneIntegrationMatrix-24605ff5ff5c-Date-2023-01-15_Time-21-06-47.log
pal <- paletteDiscrete(values = colData(seRNA)$BioClassification)
## Length of unique values greater than palette, interpolating..
pal
## 01_HSC 02_Early.Eryth 03_Late.Eryth 04_Early.Baso 05_CMP.LMPP
## "#D51F26" "#502A59" "#235D55" "#3D6E57" "#8D2B8B"
## 06_CLP.1 07_GMP 08_GMP.Neut 09_pDC 10_cDC
## "#DE6C3E" "#F9B712" "#D8CE42" "#8E9ACD" "#B774B1"
## 11_CD14.Mono.1 12_CD14.Mono.2 13_CD16.Mono 14_Unk 15_CLP.2
## "#D69FC8" "#C7C8DE" "#8FD3D4" "#89C86E" "#CC9672"
## 16_Pre.B 17_B 18_Plasma 19_CD8.N 20_CD4.N1
## "#CF7E96" "#A27AA4" "#CD4F32" "#6B977E" "#518AA3"
## 21_CD4.N2 22_CD4.M 23_CD8.EM 24_CD8.CM 25_NK
## "#5A5297" "#0F707D" "#5E2E32" "#A95A3C" "#B28D5C"
## 26_Unk
## "#3D3D3D"
p1 <- plotEmbedding(
projHeme2,
colorBy = "cellColData",
name = "predictedGroup_Un",
pal = pal
)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2460699ce87c-Date-2023-01-15_Time-21-12-55.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2460699ce87c-Date-2023-01-15_Time-21-12-55.log
p2 <- plotEmbedding(
projHeme2,
colorBy = "cellColData",
name = "predictedGroup_Co",
pal = pal
)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24603c635b0f-Date-2023-01-15_Time-21-12-56.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24603c635b0f-Date-2023-01-15_Time-21-12-56.log
p1+p2
projHeme3 <- addGeneIntegrationMatrix(
ArchRProj = projHeme2,
useMatrix = "GeneScoreMatrix",
matrixName = "GeneIntegrationMatrix",
reducedDims = "IterativeLSI",
seRNA = seRNA,
addToArrow = TRUE,
force= TRUE,
groupList = groupList,
groupRNA = "BioClassification",
nameCell = "predictedCell",
nameGroup = "predictedGroup",
nameScore = "predictedScore"
)
## ArchR logging to : ArchRLogs/ArchR-addGeneIntegrationMatrix-246072a59c6b-Date-2023-01-15_Time-21-13-00.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:13:00 : Running Seurat's Integration Stuart* et al 2019, 0.001 mins elapsed.
## 2023-01-15 21:13:00 : Checking ATAC Input, 0.005 mins elapsed.
## 2023-01-15 21:13:00 : Checking RNA Input, 0.007 mins elapsed.
## 2023-01-15 21:13:07 : Found 18601 overlapping gene names from gene scores and rna matrix!, 0.122 mins elapsed.
## 2023-01-15 21:13:07 : Creating Integration Blocks, 0.122 mins elapsed.
## 2023-01-15 21:13:07 : Prepping Interation Data, 0.124 mins elapsed.
## 2023-01-15 21:13:08 : Computing Integration in 2 Integration Blocks!, 0 mins elapsed.
## 2023-01-15 21:18:41 : Block (1 of 2) : Plotting Joint UMAP, 5.56 mins elapsed.
## 2023-01-15 21:18:59 : Block (2 of 2) : Plotting Joint UMAP, 5.849 mins elapsed.
## 2023-01-15 21:19:21 : Transferring Data to ArrowFiles, 6.226 mins elapsed.
## 2023-01-15 21:20:04 : Completed Integration with RNA Matrix, 6.93 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addGeneIntegrationMatrix-246072a59c6b-Date-2023-01-15_Time-21-13-00.log
getAvailableMatrices(projHeme3)
## [1] "GeneIntegrationMatrix" "GeneScoreMatrix" "TileMatrix"
projHeme3 <- addImputeWeights(projHeme3)
## ArchR logging to : ArchRLogs/ArchR-addImputeWeights-24606ef697a5-Date-2023-01-15_Time-21-20-04.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:20:04 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.
markerGenes <- c(
"CD34", #Early Progenitor
"GATA1", #Erythroid
"PAX5", "MS4A1", #B-Cell Trajectory
"CD14", #Monocytes
"CD3D", "CD8A", "TBX21", "IL7R" #TCells
)
p1 <- plotEmbedding(
ArchRProj = projHeme3,
colorBy = "GeneIntegrationMatrix",
name = markerGenes,
continuousSet = "horizonExtra",
embedding = "UMAP",
imputeWeights = getImputeWeights(projHeme3)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24607706aea7-Date-2023-01-15_Time-21-20-12.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneIntegrationMatrix
## Getting Matrix Values...
## 2023-01-15 21:20:12 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24607706aea7-Date-2023-01-15_Time-21-20-12.log
p2 <- plotEmbedding(
ArchRProj = projHeme3,
colorBy = "GeneScoreMatrix",
continuousSet = "horizonExtra",
name = markerGenes,
embedding = "UMAP",
imputeWeights = getImputeWeights(projHeme3)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-246057b492fd-Date-2023-01-15_Time-21-20-19.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2023-01-15 21:20:19 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-246057b492fd-Date-2023-01-15_Time-21-20-19.log
p1c <- lapply(p1, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
p2c <- lapply(p2, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
# Gene expression deduced by scRNA-seq
do.call(cowplot::plot_grid, c(list(ncol = 3), p1c))
#Gene score
do.call(cowplot::plot_grid, c(list(ncol = 3), p2c))
cM <- confusionMatrix(projHeme3$Clusters, projHeme3$predictedGroup)
labelOld <- rownames(cM)
labelOld
## [1] "C9" "C6" "C10" "C4" "C1" "C3" "C7" "C5" "C11" "C8" "C2" "C12"
labelNew <- colnames(cM)[apply(cM, 1, which.max)]
labelNew
## [1] "17_B" "20_CD4.N1" "16_Pre.B" "08_GMP.Neut"
## [5] "11_CD14.Mono.1" "03_Late.Eryth" "22_CD4.M" "01_HSC"
## [9] "09_pDC" "25_NK" "12_CD14.Mono.2" "15_CLP.2"
remapClust <- c(
"01_HSC" = "Progenitor",
"02_Early.Eryth" = "Erythroid",
"03_Late.Eryth" = "Erythroid",
"04_Early.Baso" = "Basophil",
"05_CMP.LMPP" = "Progenitor",
"06_CLP.1" = "CLP",
"07_GMP" = "GMP",
"08_GMP.Neut" = "GMP",
"09_pDC" = "pDC",
"10_cDC" = "cDC",
"11_CD14.Mono.1" = "Mono",
"12_CD14.Mono.2" = "Mono",
"13_CD16.Mono" = "Mono",
"15_CLP.2" = "CLP",
"16_Pre.B" = "PreB",
"17_B" = "B",
"18_Plasma" = "Plasma",
"19_CD8.N" = "CD8.N",
"20_CD4.N1" = "CD4.N",
"21_CD4.N2" = "CD4.N",
"22_CD4.M" = "CD4.M",
"23_CD8.EM" = "CD8.EM",
"24_CD8.CM" = "CD8.CM",
"25_NK" = "NK"
)
remapClust <- remapClust[names(remapClust) %in% labelNew]
labelNew2 <- mapLabels(labelNew, oldLabels = names(remapClust), newLabels = remapClust)
labelNew2
## [1] "B" "CD4.N" "PreB" "GMP" "Mono"
## [6] "Erythroid" "CD4.M" "Progenitor" "pDC" "NK"
## [11] "Mono" "CLP"
projHeme3$Clusters2 <- mapLabels(projHeme3$Clusters, newLabels = labelNew2, oldLabels = labelOld)
plotEmbedding(projHeme3, colorBy = "cellColData", name = "Clusters2")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24605af9df2a-Date-2023-01-15_Time-21-20-34.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24605af9df2a-Date-2023-01-15_Time-21-20-34.log
#saveArchRProject(ArchRProj = projHeme3, outputDirectory = "Save-ProjHeme3", load = FALSE)
library(BSgenome.Hsapiens.UCSC.hg19, lib.loc = "/Library/Frameworks/R.framework/Versions/4.2/Resources/library")
projHeme4 <- addGroupCoverages(ArchRProj = projHeme3, groupBy = "Clusters2")
## ArchR logging to : ArchRLogs/ArchR-addGroupCoverages-2460b825611-Date-2023-01-15_Time-21-20-35.log
## If there is an issue, please report to github with logFile!
## B (1 of 11) : CellGroups N = 2
## CD4.M (2 of 11) : CellGroups N = 2
## CD4.N (3 of 11) : CellGroups N = 2
## CLP (4 of 11) : CellGroups N = 2
## Erythroid (5 of 11) : CellGroups N = 2
## GMP (6 of 11) : CellGroups N = 2
## Mono (7 of 11) : CellGroups N = 2
## NK (8 of 11) : CellGroups N = 2
## pDC (9 of 11) : CellGroups N = 2
## PreB (10 of 11) : CellGroups N = 2
## Progenitor (11 of 11) : CellGroups N = 2
## 2023-01-15 21:20:37 : Creating Coverage Files!, 0.024 mins elapsed.
## 2023-01-15 21:20:37 : Batch Execution w/ safelapply!, 0.024 mins elapsed.
## 2023-01-15 21:22:27 : Adding Kmer Bias to Coverage Files!, 1.865 mins elapsed.
## Completed Kmer Bias Calculation
## Adding Kmer Bias (1 of 22)
## Adding Kmer Bias (2 of 22)
## Adding Kmer Bias (3 of 22)
## Adding Kmer Bias (4 of 22)
## Adding Kmer Bias (5 of 22)
## Adding Kmer Bias (6 of 22)
## Adding Kmer Bias (7 of 22)
## Adding Kmer Bias (8 of 22)
## Adding Kmer Bias (9 of 22)
## Adding Kmer Bias (10 of 22)
## Adding Kmer Bias (11 of 22)
## Adding Kmer Bias (12 of 22)
## Adding Kmer Bias (13 of 22)
## Adding Kmer Bias (14 of 22)
## Adding Kmer Bias (15 of 22)
## Adding Kmer Bias (16 of 22)
## Adding Kmer Bias (17 of 22)
## Adding Kmer Bias (18 of 22)
## Adding Kmer Bias (19 of 22)
## Adding Kmer Bias (20 of 22)
## Adding Kmer Bias (21 of 22)
## Adding Kmer Bias (22 of 22)
## 2023-01-15 21:24:20 : Finished Creation of Coverage Files!, 3.742 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addGroupCoverages-2460b825611-Date-2023-01-15_Time-21-20-35.log
projHeme4 <- addReproduciblePeakSet(
ArchRProj = projHeme4,
groupBy = "Clusters2",
pathToMacs2 = "/Users/nzhang/opt/anaconda3/envs/py37/bin/macs2"
)
## ArchR logging to : ArchRLogs/ArchR-addReproduciblePeakSet-246060ec6df7-Date-2023-01-15_Time-21-24-20.log
## If there is an issue, please report to github with logFile!
## Calling Peaks with Macs2
## 2023-01-15 21:24:21 : Peak Calling Parameters!, 0.002 mins elapsed.
## Group nCells nCellsUsed nReplicates nMin nMax maxPeaks
## B B 420 415 2 164 251 150000
## CD4.M CD4.M 815 613 2 113 500 150000
## CD4.N CD4.N 1242 549 2 49 500 150000
## CLP CLP 385 385 2 88 297 150000
## Erythroid Erythroid 899 729 2 229 500 150000
## GMP GMP 1140 840 2 340 500 150000
## Mono Mono 2570 1000 2 500 500 150000
## NK NK 726 726 2 304 422 150000
## pDC pDC 307 297 2 148 149 148500
## PreB PreB 357 357 2 40 317 150000
## Progenitor Progenitor 1389 636 2 136 500 150000
## 2023-01-15 21:24:21 : Batching Peak Calls!, 0.002 mins elapsed.
## 2023-01-15 21:24:21 : Batch Execution w/ safelapply!, 0 mins elapsed.
## 2023-01-15 21:36:14 : Identifying Reproducible Peaks!, 11.892 mins elapsed.
## 2023-01-15 21:36:24 : Creating Union Peak Set!, 12.057 mins elapsed.
## Converged after 6 iterations!
## Plotting Ggplot!
## 2023-01-15 21:36:30 : Finished Creating Union Peak Set (143229)!, 12.153 mins elapsed.
getPeakSet(projHeme4)
## GRanges object with 143229 ranges and 13 metadata columns:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <numeric>
## Mono chr1 752472-752972 * | 25.48294
## B chr1 762688-763188 * | 51.64833
## B chr1 801007-801507 * | 15.77857
## B chr1 805039-805539 * | 37.51562
## CLP chr1 845326-845826 * | 2.76241
## ... ... ... ... . ...
## CD4.N chrX 154841576-154842076 * | 3.32381
## NK chrX 154842394-154842894 * | 16.09727
## Erythroid chrX 154912373-154912873 * | 7.72601
## PreB chrX 154996185-154996685 * | 3.17074
## GMP chrX 154996992-154997492 * | 3.47992
## replicateScoreQuantile groupScoreQuantile Reproducibility
## <numeric> <numeric> <numeric>
## Mono 0.858 0.723 2
## B 0.947 0.882 2
## B 0.690 0.423 2
## B 0.955 0.897 2
## CLP 0.706 0.312 2
## ... ... ... ...
## CD4.N 0.658 0.257 2
## NK 0.731 0.542 2
## Erythroid 0.394 0.116 2
## PreB 0.672 0.283 2
## GMP 0.385 0.102 2
## GroupReplicate distToGeneStart nearestGene peakType
## <character> <integer> <character> <character>
## Mono Mono._.scATAC_BMMC_R1 10179 LINC00115 Distal
## B B._.scATAC_PBMC_R1 32 LINC01128 Promoter
## B B._.scATAC_PBMC_R1 10924 FAM41C Distal
## B B._.scATAC_BMMC_R1 6892 FAM41C Intronic
## CLP CLP._.scATAC_BMMC_R1 9240 LINC02593 Distal
## ... ... ... ... ...
## CD4.N CD4.N._.scATAC_PBMC_R1 795 TMLHE Intronic
## NK NK._.scATAC_BMMC_R1 21 TMLHE Promoter
## Erythroid Erythroid._.scATAC_C.. 70000 TMLHE Distal
## PreB PreB._.Rep2 153812 TMLHE Distal
## GMP GMP._.scATAC_CD34_BM.. 154619 TMLHE Distal
## distToTSS nearestTSS GC idx N
## <integer> <character> <numeric> <integer> <numeric>
## Mono 10179 uc010nxx.2 0.4790 1 0
## B 32 uc031pjj.1 0.6966 2 0
## B 10924 uc001abt.4 0.4371 3 0
## B 6892 uc001abt.4 0.7285 4 0
## CLP 1238 uc001abu.1 0.7904 5 0
## ... ... ... ... ... ...
## CD4.N 795 uc004fnn.3 0.4172 3445 0
## NK 21 uc004fnn.3 0.5988 3446 0
## Erythroid 70000 uc004fnn.3 0.3653 3447 0
## PreB 1015 uc004fnq.1 0.4251 3448 0
## GMP 208 uc004fnq.1 0.5449 3449 0
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
#saveArchRProject(ArchRProj = projHeme4, outputDirectory = "Save-ProjHeme4", load = FALSE)
projHeme5 <- addPeakMatrix(projHeme4)
## ArchR logging to : ArchRLogs/ArchR-addPeakMatrix-24601695a850-Date-2023-01-15_Time-21-36-30.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:36:30 : Batch Execution w/ safelapply!, 0 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addPeakMatrix-24601695a850-Date-2023-01-15_Time-21-36-30.log
getAvailableMatrices(projHeme5)
## [1] "GeneIntegrationMatrix" "GeneScoreMatrix" "PeakMatrix"
## [4] "TileMatrix"
markersPeaks <- getMarkerFeatures(
ArchRProj = projHeme5,
useMatrix = "PeakMatrix",
groupBy = "Clusters2",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon"
)
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-246014321282-Date-2023-01-15_Time-21-37-20.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Integer.Matrix
## 2023-01-15 21:37:20 : Matching Known Biases, 0.003 mins elapsed.
## ###########
## 2023-01-15 21:38:30 : Completed Pairwise Tests, 1.175 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-246014321282-Date-2023-01-15_Time-21-37-20.log
markersPeaks
## class: SummarizedExperiment
## dim: 143229 11
## metadata(2): MatchInfo Params
## assays(7): Log2FC Mean ... AUC MeanBGD
## rownames(143229): 1 2 ... 143228 143229
## rowData names(4): seqnames idx start end
## colnames(11): B CD4.M ... PreB Progenitor
## colData names(0):
markerList <- getMarkers(markersPeaks, cutOff = "FDR <= 0.01 & Log2FC >= 1", returnGR = TRUE)
markerList
## List of length 11
## names(11): B CD4.M CD4.N CLP Erythroid GMP Mono NK pDC PreB Progenitor
markerList$B
## GRanges object with 465 ranges and 3 metadata columns:
## seqnames ranges strand | Log2FC FDR MeanDiff
## <Rle> <IRanges> <Rle> | <numeric> <numeric> <numeric>
## [1] chr16 88079477-88079977 * | 4.25447 1.57704e-09 0.829693
## [2] chr3 13152065-13152565 * | 3.29373 1.57704e-09 1.316865
## [3] chr2 232537187-232537687 * | 3.42329 1.88190e-08 0.998006
## [4] chr10 49879507-49880007 * | 6.01904 2.87913e-08 0.811835
## [5] chr11 60222870-60223370 * | 4.70305 4.07094e-08 0.860517
## ... ... ... ... . ... ... ...
## [461] chr3 169531535-169532035 * | 2.77078 0.00963295 0.424682
## [462] chr8 125649626-125650126 * | 1.42537 0.00963295 0.320802
## [463] chr20 43214896-43215396 * | 2.34248 0.00974009 0.471158
## [464] chr6 62995933-62996433 * | 2.01060 0.00993924 0.359606
## [465] chr9 79087060-79087560 * | 2.46851 0.00997262 0.292655
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
heatmapPeaks <- markerHeatmap(
seMarker = markersPeaks,
cutOff = "FDR <= 0.1 & Log2FC >= 0.5",
transpose = TRUE
)
## Warning: 'markerHeatmap' is deprecated.
## Use 'plotMarkerHeatmap' instead.
## See help("Deprecated")
## ArchR logging to : ArchRLogs/ArchR-plotMarkerHeatmap-24602a4cc5f8-Date-2023-01-15_Time-21-38-33.log
## If there is an issue, please report to github with logFile!
## Identified 41053 markers!
## [1] "chr1:801007-801507" "chr1:968348-968848" "chr1:1071786-1072286"
## [4] "chr1:4067611-4068111" "chr1:4087539-4088039" "chr1:4104872-4105372"
## [7] "chr1:12212042-12212542" "chr1:15208404-15208904" "chr1:15898189-15898689"
## [10] "chr1:24054520-24055020" "chr1:24239629-24240129" "chr1:25075151-25075651"
## [13] "chr1:27070474-27070974" "chr1:27854640-27855140" "chr1:28970219-28970719"
## [16] "chr1:1003836-1004336" "chr1:1004512-1005012" "chr1:1141925-1142425"
## [19] "chr1:1152339-1152839" "chr1:1259847-1260347" "chr1:1306701-1307201"
## [22] "chr1:1307337-1307837" "chr1:1624353-1624853" "chr1:1709415-1709915"
## [25] "chr1:2125943-2126443" "chr1:2163278-2163778" "chr1:2186319-2186819"
## [28] "chr1:2246488-2246988" "chr1:2455275-2455775" "chr1:2478144-2478644"
## [31] "chr1:2084331-2084831" "chr1:2474849-2475349" "chr1:8761186-8761686"
## [34] "chr1:20960228-20960728" "chr1:22350842-22351342" "chr1:23810953-23811453"
## [37] "chr1:24194675-24195175" "chr1:24741918-24742418" "chr1:24862906-24863406"
## [40] "chr1:26644134-26644634" "chr1:27848816-27849316" "chr1:31222573-31223073"
## [43] "chr1:36772351-36772851" "chr1:39649342-39649842" "chr1:40156880-40157380"
## [46] "chr1:1114597-1115097" "chr1:1144252-1144752" "chr1:1609169-1609669"
## [49] "chr1:1765345-1765845" "chr1:1838613-1839113" "chr1:5777263-5777763"
## [52] "chr1:6640958-6641458" "chr1:9777631-9778131" "chr1:11412790-11413290"
## [55] "chr1:11435901-11436401" "chr1:11467303-11467803" "chr1:13825006-13825506"
## [58] "chr1:17336172-17336672" "chr1:19719613-19720113" "chr1:20539344-20539844"
## [61] "chr1:940282-940782" "chr1:2986471-2986971" "chr1:3244594-3245094"
## [64] "chr1:3309993-3310493" "chr1:3458489-3458989" "chr1:3691306-3691806"
## [67] "chr1:3797600-3798100" "chr1:4388438-4388938" "chr1:5569866-5570366"
## [70] "chr1:6489662-6490162" "chr1:6732543-6733043" "chr1:7296085-7296585"
## [73] "chr1:7609307-7609807" "chr1:7632733-7633233" "chr1:8576830-8577330"
## [76] "chr1:3526969-3527469" "chr1:4698287-4698787" "chr1:5831046-5831546"
## [79] "chr1:6230818-6231318" "chr1:7173691-7174191" "chr1:8998227-8998727"
## [82] "chr1:9256128-9256628" "chr1:11753213-11753713" "chr1:16212724-16213224"
## [85] "chr1:17517855-17518355" "chr1:22232666-22233166" "chr1:25198935-25199435"
## [88] "chr1:27288283-27288783" "chr1:36674838-36675338" "chr1:39826343-39826843"
## [91] "chr1:890838-891338" "chr1:1242382-1242882" "chr1:1310497-1310997"
## [94] "chr1:1695647-1696147" "chr1:1711624-1712124" "chr1:1821570-1822070"
## [97] "chr1:2074239-2074739" "chr1:2080069-2080569" "chr1:2082652-2083152"
## [100] "chr1:2165768-2166268" "chr1:2177462-2177962" "chr1:2178055-2178555"
## [103] "chr1:2186949-2187449" "chr1:3594022-3594522" "chr1:3817692-3818192"
## [106] "chr1:976000-976500" "chr1:1148113-1148613" "chr1:1157229-1157729"
## [109] "chr1:1243747-1244247" "chr1:1564723-1565223" "chr1:1565451-1565951"
## [112] "chr1:1566572-1567072" "chr1:1567413-1567913" "chr1:1623715-1624215"
## [115] "chr1:1840413-1840913" "chr1:2161231-2161731" "chr1:2171009-2171509"
## [118] "chr1:2187486-2187986" "chr1:2236638-2237138" "chr1:2477559-2478059"
## [121] "chr1:2225944-2226444" "chr1:2886250-2886750" "chr1:6084410-6084910"
## [124] "chr1:7022468-7022968" "chr1:8030210-8030710" "chr1:8537239-8537739"
## [127] "chr1:8889516-8890016" "chr1:10313802-10314302" "chr1:13820457-13820957"
## [130] "chr1:16023323-16023823" "chr1:16416444-16416944" "chr1:17567375-17567875"
## [133] "chr1:19732624-19733124" "chr1:20404699-20405199" "chr1:21651559-21652059"
## [136] "chr1:1774207-1774707" "chr1:1833387-1833887" "chr1:6647377-6647877"
## [139] "chr1:9473434-9473934" "chr1:11751396-11751896" "chr1:23855064-23855564"
## [142] "chr1:24055079-24055579" "chr1:24517429-24517929" "chr1:26098310-26098810"
## [145] "chr1:26867767-26868267" "chr1:26869514-26870014" "chr1:27032626-27033126"
## [148] "chr1:27853533-27854033" "chr1:27928850-27929350" "chr1:31949777-31950277"
## [151] "chr1:845326-845826" "chr1:856373-856873" "chr1:999469-999969"
## [154] "chr1:1079369-1079869" "chr1:1369562-1370062" "chr1:1831012-1831512"
## [157] "chr1:1935026-1935526" "chr1:1977839-1978339" "chr1:2058349-2058849"
## [160] "chr1:2210550-2211050" "chr1:2221933-2222433" "chr1:2705485-2705985"
## [163] "chr1:2762283-2762783" "chr1:2804998-2805498" "chr1:2829153-2829653"
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## ArchR logging successful to : ArchRLogs/ArchR-plotMarkerHeatmap-24602a4cc5f8-Date-2023-01-15_Time-21-38-33.log
draw(heatmapPeaks, heatmap_legend_side = "bot", annotation_legend_side = "bot")
pv <- markerPlot(seMarker = markersPeaks, name = "Erythroid", cutOff = "FDR <= 0.1 & Log2FC >= 1", plotAs = "Volcano")
## Warning: 'markerPlot' is deprecated.
## Use 'plotMarkers' instead.
## See help("Deprecated")
pv
## Warning: Removed 58 rows containing missing values (`geom_point_rast()`).
p <- plotBrowserTrack(
ArchRProj = projHeme5,
groupBy = "Clusters2",
geneSymbol = c("GATA1"),
features = getMarkers(markersPeaks, cutOff = "FDR <= 0.1 & Log2FC >= 1", returnGR = TRUE)["Erythroid"],
upstream = 50000,
downstream = 50000
)
## ArchR logging to : ArchRLogs/ArchR-plotBrowserTrack-246037820724-Date-2023-01-15_Time-21-38-53.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:38:53 : Validating Region, 0.002 mins elapsed.
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | gene_id symbol
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chrX 48644982-48652717 + | 2623 GATA1
## -------
## seqinfo: 24 sequences from hg19 genome
## 2023-01-15 21:38:53 : Adding Bulk Tracks (1 of 1), 0.002 mins elapsed.
## 2023-01-15 21:38:54 : Adding Feature Tracks (1 of 1), 0.014 mins elapsed.
## 2023-01-15 21:38:54 : Adding Gene Tracks (1 of 1), 0.015 mins elapsed.
## 2023-01-15 21:38:54 : Plotting, 0.018 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotBrowserTrack-246037820724-Date-2023-01-15_Time-21-38-53.log
grid::grid.newpage()
grid::grid.draw(p$GATA1)
markerTest <- getMarkerFeatures(
ArchRProj = projHeme5,
useMatrix = "PeakMatrix",
groupBy = "Clusters2",
testMethod = "wilcoxon",
bias = c("TSSEnrichment", "log10(nFrags)"),
useGroups = "Erythroid",
bgdGroups = "Progenitor"
)
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-24602eb169ba-Date-2023-01-15_Time-21-38-55.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Integer.Matrix
## 2023-01-15 21:38:55 : Matching Known Biases, 0.003 mins elapsed.
## 2023-01-15 21:38:56 : Computing Pairwise Tests (1 of 1), 0.018 mins elapsed.
## Pairwise Test Erythroid : Seqnames chr1
## Pairwise Test Erythroid : Seqnames chr10
## Pairwise Test Erythroid : Seqnames chr11
## Pairwise Test Erythroid : Seqnames chr12
## Pairwise Test Erythroid : Seqnames chr13
## Pairwise Test Erythroid : Seqnames chr14
## Pairwise Test Erythroid : Seqnames chr15
## Pairwise Test Erythroid : Seqnames chr16
## Pairwise Test Erythroid : Seqnames chr17
## Pairwise Test Erythroid : Seqnames chr18
## Pairwise Test Erythroid : Seqnames chr19
## Pairwise Test Erythroid : Seqnames chr2
## Pairwise Test Erythroid : Seqnames chr20
## Pairwise Test Erythroid : Seqnames chr21
## Pairwise Test Erythroid : Seqnames chr22
## Pairwise Test Erythroid : Seqnames chr3
## Pairwise Test Erythroid : Seqnames chr4
## Pairwise Test Erythroid : Seqnames chr5
## Pairwise Test Erythroid : Seqnames chr6
## Pairwise Test Erythroid : Seqnames chr7
## Pairwise Test Erythroid : Seqnames chr8
## Pairwise Test Erythroid : Seqnames chr9
## Pairwise Test Erythroid : Seqnames chrX
## ###########
## 2023-01-15 21:39:17 : Completed Pairwise Tests, 0.355 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-24602eb169ba-Date-2023-01-15_Time-21-38-55.log
markerPlot(seMarker = markerTest, name = "Erythroid", cutOff = "FDR <= 0.1 & abs(Log2FC) >= 1", plotAs = "Volcano")
## Warning: 'markerPlot' is deprecated.
## Use 'plotMarkers' instead.
## See help("Deprecated")
## Warning: Removed 55 rows containing missing values (`geom_point_rast()`).
#saveArchRProject(ArchRProj = projHeme5, outputDirectory = "Save-ProjHeme5", load = FALSE)
projHeme5 <- addMotifAnnotations(ArchRProj = projHeme5, motifSet = "cisbp", name = "Motif")
## ArchR logging to : ArchRLogs/ArchR-addMotifAnnotations-246073fc7fbb-Date-2023-01-15_Time-21-39-25.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:39:27 : Gettting Motif Set, Species : Homo sapiens, 0.002 mins elapsed.
## Using version 2 motifs!
## 2023-01-15 21:39:28 : Finding Motif Positions with motifmatchr!, 0.029 mins elapsed.
## 2023-01-15 21:41:42 : All Motifs Overlap at least 1 peak!, 2.26 mins elapsed.
## 2023-01-15 21:41:42 : Creating Motif Overlap Matrix, 2.26 mins elapsed.
## 2023-01-15 21:41:45 : Finished Getting Motif Info!, 2.302 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addMotifAnnotations-246073fc7fbb-Date-2023-01-15_Time-21-39-25.log
motifsUp <- peakAnnoEnrichment(
seMarker = markerTest,
ArchRProj = projHeme5,
peakAnnotation = "Motif",
cutOff = "FDR <= 0.1 & Log2FC >= 0.5"
)
## ArchR logging to : ArchRLogs/ArchR-peakAnnoEnrichment-24606b384c90-Date-2023-01-15_Time-21-41-46.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:41:49 : Computing Enrichments 1 of 1, 0.057 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-peakAnnoEnrichment-24606b384c90-Date-2023-01-15_Time-21-41-46.log
motifsUp
## class: SummarizedExperiment
## dim: 870 1
## metadata(0):
## assays(10): mlog10Padj mlog10p ... CompareFrequency feature
## rownames(870): TFAP2B_1 TFAP2D_2 ... TBX18_869 TBX22_870
## rowData names(0):
## colnames(1): Erythroid
## colData names(0):
df <- data.frame(TF = rownames(motifsUp), mlog10Padj = assay(motifsUp)[,1])
df <- df[order(df$mlog10Padj, decreasing = TRUE),]
df$rank <- seq_len(nrow(df))
head(df)
## TF mlog10Padj rank
## 383 GATA1_383 507.2283 1
## 388 GATA2_388 497.4459 2
## 384 GATA3_384 422.9416 3
## 385 GATA5_385 409.9321 4
## 386 GATA4_386 315.7656 5
## 387 GATA6_387 214.8645 6
ggplot(df, aes(rank, mlog10Padj, color = mlog10Padj)) +
geom_point(size = 1) +
ggrepel::geom_label_repel(
data = df[rev(seq_len(30)), ], aes(x = rank, y = mlog10Padj, label = TF),
size = 1.5,
nudge_x = 2,
color = "black"
) + theme_ArchR() +
ylab("-log10(P-adj) Motif Enrichment") +
xlab("Rank Sorted TFs Enriched") +
scale_color_gradientn(colors = paletteContinuous(set = "comet"))
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
enrichMotifs <- peakAnnoEnrichment(
seMarker = markersPeaks,
ArchRProj = projHeme5,
peakAnnotation = "Motif",
cutOff = "FDR <= 0.1 & Log2FC >= 0.5"
)
## ArchR logging to : ArchRLogs/ArchR-peakAnnoEnrichment-246058087f26-Date-2023-01-15_Time-21-41-50.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:41:54 : Computing Enrichments 1 of 11, 0.056 mins elapsed.
## 2023-01-15 21:41:54 : Computing Enrichments 2 of 11, 0.062 mins elapsed.
## 2023-01-15 21:41:54 : Computing Enrichments 3 of 11, 0.065 mins elapsed.
## 2023-01-15 21:41:55 : Computing Enrichments 4 of 11, 0.07 mins elapsed.
## 2023-01-15 21:41:55 : Computing Enrichments 5 of 11, 0.075 mins elapsed.
## 2023-01-15 21:41:55 : Computing Enrichments 6 of 11, 0.078 mins elapsed.
## 2023-01-15 21:41:55 : Computing Enrichments 7 of 11, 0.083 mins elapsed.
## 2023-01-15 21:41:56 : Computing Enrichments 8 of 11, 0.087 mins elapsed.
## 2023-01-15 21:41:56 : Computing Enrichments 9 of 11, 0.092 mins elapsed.
## 2023-01-15 21:41:56 : Computing Enrichments 10 of 11, 0.095 mins elapsed.
## 2023-01-15 21:41:56 : Computing Enrichments 11 of 11, 0.1 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-peakAnnoEnrichment-246058087f26-Date-2023-01-15_Time-21-41-50.log
heatmapEM <- plotEnrichHeatmap(enrichMotifs, n = 7, transpose = TRUE)
## ArchR logging to : ArchRLogs/ArchR-plotEnrichHeatmap-2460226b569-Date-2023-01-15_Time-21-41-57.log
## If there is an issue, please report to github with logFile!
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
ComplexHeatmap::draw(heatmapEM, heatmap_legend_side = "bot", annotation_legend_side = "bot")
## The overlap with bulk ATAC-seq peak set
projHeme5 <- addArchRAnnotations(ArchRProj = projHeme5, collection = "ATAC")
## ArchR logging to : ArchRLogs/ArchR-addArchRAnnotations-24606461704e-Date-2023-01-15_Time-21-41-58.log
## If there is an issue, please report to github with logFile!
## Annotating Chromosomes
## 2023-01-15 21:41:58 :
## Annotating Chr: chr1
## 2023-01-15 21:41:58 :
## Annotating Chr: chr2
## 2023-01-15 21:41:58 :
## Annotating Chr: chr3
## 2023-01-15 21:41:58 :
## Annotating Chr: chr4
## 2023-01-15 21:41:58 :
## Annotating Chr: chr5
## 2023-01-15 21:41:59 :
## Annotating Chr: chr6
## 2023-01-15 21:41:59 :
## Annotating Chr: chr7
## 2023-01-15 21:41:59 :
## Annotating Chr: chr8
## 2023-01-15 21:41:59 :
## Annotating Chr: chr9
## 2023-01-15 21:41:59 :
## Annotating Chr: chr10
## 2023-01-15 21:41:59 :
## Annotating Chr: chr11
## 2023-01-15 21:41:59 :
## Annotating Chr: chr12
## 2023-01-15 21:41:59 :
## Annotating Chr: chr13
## 2023-01-15 21:41:59 :
## Annotating Chr: chr14
## 2023-01-15 21:41:59 :
## Annotating Chr: chr15
## 2023-01-15 21:42:00 :
## Annotating Chr: chr16
## 2023-01-15 21:42:00 :
## Annotating Chr: chr17
## 2023-01-15 21:42:00 :
## Annotating Chr: chr18
## 2023-01-15 21:42:00 :
## Annotating Chr: chr19
## 2023-01-15 21:42:00 :
## Annotating Chr: chr20
## 2023-01-15 21:42:00 :
## Annotating Chr: chr21
## 2023-01-15 21:42:00 :
## Annotating Chr: chr22
## 2023-01-15 21:42:00 :
## Annotating Chr: chrX
## 2023-01-15 21:42:00 :
## 2023-01-15 21:42:01 : All Regions Overlap at least 1 peak!, 0.052 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addArchRAnnotations-24606461704e-Date-2023-01-15_Time-21-41-58.log
enrichATAC <- peakAnnoEnrichment(
seMarker = markersPeaks,
ArchRProj = projHeme5,
peakAnnotation = "ATAC",
cutOff = "FDR <= 0.1 & Log2FC >= 0.5"
)
## ArchR logging to : ArchRLogs/ArchR-peakAnnoEnrichment-24603914445e-Date-2023-01-15_Time-21-42-01.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:42:05 : Computing Enrichments 1 of 11, 0.051 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 2 of 11, 0.054 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 3 of 11, 0.055 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 4 of 11, 0.057 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 5 of 11, 0.059 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 6 of 11, 0.061 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 7 of 11, 0.063 mins elapsed.
## 2023-01-15 21:42:05 : Computing Enrichments 8 of 11, 0.064 mins elapsed.
## 2023-01-15 21:42:06 : Computing Enrichments 9 of 11, 0.067 mins elapsed.
## 2023-01-15 21:42:06 : Computing Enrichments 10 of 11, 0.069 mins elapsed.
## 2023-01-15 21:42:06 : Computing Enrichments 11 of 11, 0.07 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-peakAnnoEnrichment-24603914445e-Date-2023-01-15_Time-21-42-01.log
heatmapATAC <- plotEnrichHeatmap(enrichATAC, n = 7, transpose = TRUE)
## ArchR logging to : ArchRLogs/ArchR-plotEnrichHeatmap-246061a7b934-Date-2023-01-15_Time-21-42-06.log
## If there is an issue, please report to github with logFile!
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
ComplexHeatmap::draw(heatmapATAC, heatmap_legend_side = "bot", annotation_legend_side = "bot")
if("Motif" %ni% names(projHeme5@peakAnnotation)){
projHeme5 <- addMotifAnnotations(ArchRProj = projHeme5, motifSet = "cisbp", name = "Motif")
}
projHeme5 <- addBgdPeaks(projHeme5)
## Identifying Background Peaks!
# This function save the result into the "MotifMatrix" matrix which can be used later.
projHeme5 <- addDeviationsMatrix(
ArchRProj = projHeme5,
peakAnnotation = "Motif",
force = TRUE
)
## Using Previous Background Peaks!
## ArchR logging to : ArchRLogs/ArchR-addDeviationsMatrix-2460738b4101-Date-2023-01-15_Time-21-42-14.log
## If there is an issue, please report to github with logFile!
## NULL
## as(<lgCMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "dMatrix") instead
## 2023-01-15 21:42:17 : Batch Execution w/ safelapply!, 0 mins elapsed.
## ###########
## 2023-01-15 21:49:41 : Completed Computing Deviations!, 7.454 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-addDeviationsMatrix-2460738b4101-Date-2023-01-15_Time-21-42-14.log
plotVarDev <- getVarDeviations(projHeme5, name = "MotifMatrix", plot = TRUE)
## DataFrame with 6 rows and 6 columns
## seqnames idx name combinedVars combinedMeans rank
## <Rle> <array> <array> <numeric> <numeric> <integer>
## f388 z 388 GATA2_388 11.8166 -0.0346497 1
## f155 z 155 CEBPA_155 11.5241 -0.1882559 2
## f336 z 336 SPIB_336 11.5169 -0.0824527 3
## f383 z 383 GATA1_383 11.5130 -0.0435856 4
## f651 z 651 SMARCC1_651 10.4661 -0.1335942 5
## f385 z 385 GATA5_385 10.3523 -0.0341025 6
plotVarDev
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Check the variation of target TF motifs across all cells. ### Get the
motif names corresponding to each TF
motifs <- c("GATA1", "CEBPA", "EBF1", "IRF4", "TBX21", "PAX5")
markerMotifs <- getFeatures(projHeme5, select = paste(motifs, collapse="|"), useMatrix = "MotifMatrix")
markerMotifs
## [1] "z:TBX21_780" "z:PAX5_709" "z:IRF4_632"
## [4] "z:GATA1_383" "z:CEBPA_155" "z:EBF1_67"
## [7] "z:SREBF1_22" "deviations:TBX21_780" "deviations:PAX5_709"
## [10] "deviations:IRF4_632" "deviations:GATA1_383" "deviations:CEBPA_155"
## [13] "deviations:EBF1_67" "deviations:SREBF1_22"
markerMotifs <- grep("z:", markerMotifs, value = TRUE)
markerMotifs <- markerMotifs[markerMotifs %ni% "z:SREBF1_22"]
markerMotifs
## [1] "z:TBX21_780" "z:PAX5_709" "z:IRF4_632" "z:GATA1_383" "z:CEBPA_155"
## [6] "z:EBF1_67"
projHeme5 <- addImputeWeights(projHeme5)
## ArchR logging to : ArchRLogs/ArchR-addImputeWeights-246015784905-Date-2023-01-15_Time-21-49-44.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:49:44 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.
p <- plotGroups(ArchRProj = projHeme5,
groupBy = "Clusters2",
colorBy = "MotifMatrix",
name = markerMotifs,
imputeWeights = getImputeWeights(projHeme5)
)
## Getting ImputeWeights
## Getting Matrix Values...
## 2023-01-15 21:49:52 :
##
## ArchR logging to : ArchRLogs/ArchR-imputeMatrix-246053c8d7d2-Date-2023-01-15_Time-21-49-54.log
## If there is an issue, please report to github with logFile!
## Using weights on disk
## Using weights on disk
## 1 2 3 4 5 6
p2 <- lapply(seq_along(p), function(x){
if(x != 1){
p[[x]] + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6) +
theme(plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm")) +
theme(
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y=element_blank()
) + ylab("")
}else{
p[[x]] + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6) +
theme(plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm")) +
theme(
axis.ticks.y=element_blank(),
axis.title.y=element_blank()
) + ylab("")
}
})
do.call(cowplot::plot_grid, c(list(nrow = 1, rel_widths = c(2, rep(1, length(p2) - 1))),p2))
## Picking joint bandwidth of 0.0196
## Picking joint bandwidth of 0.0243
## Picking joint bandwidth of 0.068
## Picking joint bandwidth of 0.114
## Picking joint bandwidth of 0.0911
## Picking joint bandwidth of 0.0702
### Plot the motif enrichment z-scores on UMAP
p <- plotEmbedding(
ArchRProj = projHeme5,
colorBy = "MotifMatrix",
name = sort(markerMotifs),
embedding = "UMAP",
imputeWeights = getImputeWeights(projHeme5)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24603cace528-Date-2023-01-15_Time-21-49-58.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = MotifMatrix
## Getting Matrix Values...
## 2023-01-15 21:49:58 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1
## Warning in file(file, if (append) "a" else "w"): cannot open file
## 'ArchRLogs/ArchR-plotEmbedding-24603cace528-Date-2023-01-15_Time-21-49-58.log':
## Interrupted system call
## 2 3 4 5 6
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24603cace528-Date-2023-01-15_Time-21-49-58.log
p2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))
### Plot the previously infered gene expression scores on UMAP and
compare with motif enrichment z-scores
markerRNA <- getFeatures(projHeme5, select = paste(motifs, collapse="|"), useMatrix = "GeneScoreMatrix")
markerRNA <- markerRNA[markerRNA %ni% c("SREBF1","CEBPA-DT")]
markerRNA
## [1] "TBX21" "CEBPA" "EBF1" "IRF4" "PAX5" "GATA1"
p <- plotEmbedding(
ArchRProj = projHeme5,
colorBy = "GeneScoreMatrix",
name = sort(markerRNA),
embedding = "UMAP",
imputeWeights = getImputeWeights(projHeme5)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24604037a2a6-Date-2023-01-15_Time-21-50-08.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2023-01-15 21:50:08 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1 2 3 4 5 6
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24604037a2a6-Date-2023-01-15_Time-21-50-08.log
p2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))
markerRNA <- getFeatures(projHeme5, select = paste(motifs, collapse="|"), useMatrix = "GeneIntegrationMatrix")
markerRNA <- markerRNA[markerRNA %ni% c("SREBF1","CEBPA-DT")]
markerRNA
## [1] "TBX21" "CEBPA" "EBF1" "IRF4" "PAX5" "GATA1"
p <- plotEmbedding(
ArchRProj = projHeme5,
colorBy = "GeneIntegrationMatrix",
name = sort(markerRNA),
embedding = "UMAP",
continuousSet = "blueYellow",
imputeWeights = getImputeWeights(projHeme5)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-24605d9281b5-Date-2023-01-15_Time-21-50-17.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneIntegrationMatrix
## Getting Matrix Values...
## 2023-01-15 21:50:18 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1 2 3 4 5 6
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-24605d9281b5-Date-2023-01-15_Time-21-50-17.log
p2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))
## ChromVAR custom deviation ### Calculate enrichment using bulk
ATAC-seq peaks
if("ATAC" %ni% names(projHeme5@peakAnnotation)){
projHeme5 <- addArchRAnnotations(ArchRProj = projHeme5, collection = "ATAC")
}
projHeme5 <- addDeviationsMatrix(
ArchRProj = projHeme5,
peakAnnotation = "ATAC",
force = TRUE
)
## Using Previous Background Peaks!
## ArchR logging to : ArchRLogs/ArchR-addDeviationsMatrix-24601a018088-Date-2023-01-15_Time-21-50-28.log
## If there is an issue, please report to github with logFile!
## NULL
## 2023-01-15 21:50:30 : Batch Execution w/ safelapply!, 0 mins elapsed.
## ###########
## 2023-01-15 21:53:21 : Completed Computing Deviations!, 2.887 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-addDeviationsMatrix-24601a018088-Date-2023-01-15_Time-21-50-28.log
plotVarDev <- getVarDeviations(projHeme5, plot = TRUE, name = "ATACMatrix")
## DataFrame with 6 rows and 6 columns
## seqnames idx name combinedVars combinedMeans rank
## <Rle> <array> <array> <numeric> <numeric> <integer>
## f22 z 22 IAtlas_T_CD8posCenMem 12.8024 -0.0918708 1
## f86 z 86 Heme_CD8 12.5015 -0.0686054 2
## f85 z 85 Heme_CD4 12.3654 -0.0506556 3
## f23 z 23 IAtlas_T_CD8posEffMem 12.2010 -0.0997087 4
## f21 z 21 IAtlas_T_CD8pos 12.1765 -0.0840077 5
## f33 z 33 IAtlas_T_Th1Precursor 12.1520 -0.0830697 6
plotVarDev
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ATACPeaks <- c("Heme_HSC", "Heme_LMPP", "Heme_Ery", "Heme_Mono", "Heme_CD4", "Heme_CD8", "Heme_B", "Heme_NK", "IAtlas_DC_Plasmacytoid")
markerATAC <- getFeatures(projHeme5, select = paste(ATACPeaks, collapse="|"), useMatrix = "ATACMatrix")
markerATAC <- sort(grep("z:", markerATAC, value = TRUE))
markerATAC
## [1] "z:Heme_B" "z:Heme_CD4"
## [3] "z:Heme_CD8" "z:Heme_Ery"
## [5] "z:Heme_HSC" "z:Heme_LMPP"
## [7] "z:Heme_Mono" "z:Heme_NK"
## [9] "z:IAtlas_DC_Plasmacytoid"
p <- plotEmbedding(
ArchRProj = projHeme5,
colorBy = "ATACMatrix",
name = markerATAC,
embedding = "UMAP",
imputeWeights = getImputeWeights(projHeme5)
)
## Getting ImputeWeights
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-2460125aed26-Date-2023-01-15_Time-21-53-24.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = ATACMatrix
## Getting Matrix Values...
## 2023-01-15 21:53:24 :
##
## Imputing Matrix
## Using weights on disk
## Using weights on disk
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-2460125aed26-Date-2023-01-15_Time-21-53-24.log
p2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))
# Motif footprinting
#Get the "Motif" (CIS-BP motifs) from peakAnnotation slot
motifPositions <- getPositions(projHeme5,name="Motif")
motifPositions
## GRangesList object of length 870:
## $TFAP2B_1
## GRanges object with 16724 ranges and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <numeric>
## [1] chr1 873916-873927 + | 8.30742
## [2] chr1 873916-873927 - | 8.30742
## [3] chr1 875505-875516 + | 10.20625
## [4] chr1 875505-875516 - | 9.04384
## [5] chr1 896671-896682 + | 9.94830
## ... ... ... ... . ...
## [16720] chrX 154032970-154032981 + | 8.57909
## [16721] chrX 154299568-154299579 + | 8.88679
## [16722] chrX 154664929-154664940 - | 8.15284
## [16723] chrX 154807684-154807695 + | 9.56155
## [16724] chrX 154807684-154807695 - | 10.59781
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
##
## ...
## <869 more elements>
motifs <- c("GATA1", "CEBPA", "EBF1", "IRF4", "TBX21", "PAX5")
markerMotifs <- unlist(lapply(motifs, function(x) grep(x, names(motifPositions), value = TRUE)))
markerMotifs <- markerMotifs[markerMotifs %ni% "SREBF1_22"]
markerMotifs
## [1] "GATA1_383" "CEBPA_155" "EBF1_67" "IRF4_632" "TBX21_780" "PAX5_709"
seFoot <- getFootprints(
ArchRProj = projHeme5,
positions = motifPositions[markerMotifs],
groupBy = "Clusters2"
)
## ArchR logging to : ArchRLogs/ArchR-getFootprints-24607fe48132-Date-2023-01-15_Time-21-53-34.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:53:34 : Computing Kmer Bias Table, 0.003 mins elapsed.
## 2023-01-15 21:53:42 : Finished Computing Kmer Tables, 0.123 mins elapsed.
## 2023-01-15 21:53:42 : Computing Footprints, 0.126 mins elapsed.
## 2023-01-15 21:53:57 : Computing Footprints Bias, 0.383 mins elapsed.
## 2023-01-15 21:54:10 : Summarizing Footprints, 0.6 mins elapsed.
plotFootprints(
seFoot = seFoot,
ArchRProj = projHeme5,
normMethod = "None",
plotName = "Footprints-No-Normalization",
addDOC = FALSE,
smoothWindow = 5
)
## ArchR logging to : ArchRLogs/ArchR-plotFootprints-24605c019124-Date-2023-01-15_Time-21-54-12.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:54:12 : Plotting Footprint : GATA1_383 (1 of 6), 0.001 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = None
## 2023-01-15 21:54:13 : Plotting Footprint : CEBPA_155 (2 of 6), 0.023 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = None
## 2023-01-15 21:54:14 : Plotting Footprint : EBF1_67 (3 of 6), 0.045 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = None
## 2023-01-15 21:54:16 : Plotting Footprint : IRF4_632 (4 of 6), 0.068 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = None
## 2023-01-15 21:54:17 : Plotting Footprint : TBX21_780 (5 of 6), 0.089 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = None
## 2023-01-15 21:54:18 : Plotting Footprint : PAX5_709 (6 of 6), 0.112 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = None
## ArchR logging successful to : ArchRLogs/ArchR-plotFootprints-24605c019124-Date-2023-01-15_Time-21-54-12.log
plotFootprints(
seFoot = seFoot,
ArchRProj = projHeme5,
normMethod = "Subtract",
plotName = "Footprints-Subtract-Bias",
addDOC = FALSE,
smoothWindow = 5
)
## ArchR logging to : ArchRLogs/ArchR-plotFootprints-2460351f539-Date-2023-01-15_Time-21-54-20.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:54:20 : Plotting Footprint : GATA1_383 (1 of 6), 0.001 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Subtract
## 2023-01-15 21:54:21 : Plotting Footprint : CEBPA_155 (2 of 6), 0.023 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Subtract
## 2023-01-15 21:54:22 : Plotting Footprint : EBF1_67 (3 of 6), 0.045 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Subtract
## 2023-01-15 21:54:24 : Plotting Footprint : IRF4_632 (4 of 6), 0.07 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Subtract
## 2023-01-15 21:54:25 : Plotting Footprint : TBX21_780 (5 of 6), 0.093 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Subtract
## 2023-01-15 21:54:27 : Plotting Footprint : PAX5_709 (6 of 6), 0.116 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Subtract
## ArchR logging successful to : ArchRLogs/ArchR-plotFootprints-2460351f539-Date-2023-01-15_Time-21-54-20.log
plotFootprints(
seFoot = seFoot,
ArchRProj = projHeme5,
normMethod = "Divide",
plotName = "Footprints-Divide-Bias",
addDOC = FALSE,
smoothWindow = 5
)
## ArchR logging to : ArchRLogs/ArchR-plotFootprints-2460312e5133-Date-2023-01-15_Time-21-54-28.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:54:28 : Plotting Footprint : GATA1_383 (1 of 6), 0.002 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Divide
## 2023-01-15 21:54:29 : Plotting Footprint : CEBPA_155 (2 of 6), 0.024 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Divide
## 2023-01-15 21:54:31 : Plotting Footprint : EBF1_67 (3 of 6), 0.047 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Divide
## 2023-01-15 21:54:32 : Plotting Footprint : IRF4_632 (4 of 6), 0.069 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Divide
## 2023-01-15 21:54:34 : Plotting Footprint : TBX21_780 (5 of 6), 0.093 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Divide
## 2023-01-15 21:54:35 : Plotting Footprint : PAX5_709 (6 of 6), 0.115 mins elapsed.
## Applying smoothing window to footprint
## Normalizing by flanking regions
## NormMethod = Divide
## ArchR logging successful to : ArchRLogs/ArchR-plotFootprints-2460312e5133-Date-2023-01-15_Time-21-54-28.log
seTSS <- getFootprints(
ArchRProj = projHeme5,
positions = GRangesList(TSS = getTSS(projHeme5)),
groupBy = "Clusters2",
flank = 2000
)
## ArchR logging to : ArchRLogs/ArchR-getFootprints-246052d17adc-Date-2023-01-15_Time-21-54-36.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:54:36 : Computing Kmer Bias Table, 0.002 mins elapsed.
## 2023-01-15 21:54:54 : Finished Computing Kmer Tables, 0.286 mins elapsed.
## 2023-01-15 21:54:54 : Computing Footprints, 0.288 mins elapsed.
## 2023-01-15 21:55:10 : Computing Footprints Bias, 0.561 mins elapsed.
## 2023-01-15 21:55:25 : Summarizing Footprints, 0.808 mins elapsed.
plotFootprints(
seFoot = seTSS,
ArchRProj = projHeme5,
normMethod = "None",
plotName = "TSS-No-Normalization",
addDOC = FALSE,
flank = 2000,
flankNorm = 100
)
## ArchR logging to : ArchRLogs/ArchR-plotFootprints-246036d92bfe-Date-2023-01-15_Time-21-55-26.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:55:26 : Plotting Footprint : TSS (1 of 1), 0.001 mins elapsed.
## Normalizing by flanking regions
## NormMethod = None
## ArchR logging successful to : ArchRLogs/ArchR-plotFootprints-246036d92bfe-Date-2023-01-15_Time-21-55-26.log
projHeme5 <- addCoAccessibility(
ArchRProj = projHeme5,
reducedDims = "IterativeLSI"
)
## ArchR logging to : ArchRLogs/ArchR-addCoAccessibility-246043a887da-Date-2023-01-15_Time-21-55-35.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:55:35 : Computing KNN, 0.002 mins elapsed.
## 2023-01-15 21:55:35 : Identifying Non-Overlapping KNN pairs, 0.004 mins elapsed.
## 2023-01-15 21:55:38 : Identified 494 Groupings!, 0.065 mins elapsed.
## 2023-01-15 21:55:40 : Computing Co-Accessibility chr1 (1 of 23), 0.09 mins elapsed.
## 2023-01-15 21:55:45 : Computing Co-Accessibility chr2 (2 of 23), 0.171 mins elapsed.
## 2023-01-15 21:55:48 : Computing Co-Accessibility chr3 (3 of 23), 0.232 mins elapsed.
## 2023-01-15 21:55:52 : Computing Co-Accessibility chr4 (4 of 23), 0.286 mins elapsed.
## 2023-01-15 21:55:55 : Computing Co-Accessibility chr5 (5 of 23), 0.334 mins elapsed.
## 2023-01-15 21:55:58 : Computing Co-Accessibility chr6 (6 of 23), 0.384 mins elapsed.
## 2023-01-15 21:56:01 : Computing Co-Accessibility chr7 (7 of 23), 0.438 mins elapsed.
## 2023-01-15 21:56:04 : Computing Co-Accessibility chr8 (8 of 23), 0.489 mins elapsed.
## 2023-01-15 21:56:07 : Computing Co-Accessibility chr9 (9 of 23), 0.537 mins elapsed.
## 2023-01-15 21:56:10 : Computing Co-Accessibility chr10 (10 of 23), 0.586 mins elapsed.
## 2023-01-15 21:56:13 : Computing Co-Accessibility chr11 (11 of 23), 0.636 mins elapsed.
## 2023-01-15 21:56:16 : Computing Co-Accessibility chr12 (12 of 23), 0.69 mins elapsed.
## 2023-01-15 21:56:19 : Computing Co-Accessibility chr13 (13 of 23), 0.743 mins elapsed.
## 2023-01-15 21:56:22 : Computing Co-Accessibility chr14 (14 of 23), 0.785 mins elapsed.
## 2023-01-15 21:56:24 : Computing Co-Accessibility chr15 (15 of 23), 0.831 mins elapsed.
## 2023-01-15 21:56:27 : Computing Co-Accessibility chr16 (16 of 23), 0.877 mins elapsed.
## 2023-01-15 21:56:30 : Computing Co-Accessibility chr17 (17 of 23), 0.926 mins elapsed.
## 2023-01-15 21:56:33 : Computing Co-Accessibility chr18 (18 of 23), 0.981 mins elapsed.
## 2023-01-15 21:56:36 : Computing Co-Accessibility chr19 (19 of 23), 1.022 mins elapsed.
## 2023-01-15 21:56:39 : Computing Co-Accessibility chr20 (20 of 23), 1.078 mins elapsed.
## 2023-01-15 21:56:42 : Computing Co-Accessibility chr21 (21 of 23), 1.123 mins elapsed.
## 2023-01-15 21:56:44 : Computing Co-Accessibility chr22 (22 of 23), 1.162 mins elapsed.
## 2023-01-15 21:56:47 : Computing Co-Accessibility chrX (23 of 23), 1.205 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addCoAccessibility-246043a887da-Date-2023-01-15_Time-21-55-35.log
cA <- getCoAccessibility(
ArchRProj = projHeme5,
corCutOff = 0.5,
resolution = 1,
returnLoops = FALSE
)
cA
## DataFrame with 95944 rows and 11 columns
## queryHits subjectHits seqnames correlation Variability1 Variability2
## <integer> <integer> <Rle> <numeric> <numeric> <numeric>
## 1 5 7 chr1 0.729628 0.00559909 0.02913767
## 2 5 33 chr1 0.504182 0.00559909 0.01104183
## 3 6 7 chr1 0.511142 0.00409733 0.02913767
## 4 7 5 chr1 0.729628 0.02913767 0.00559909
## 5 7 6 chr1 0.511142 0.02913767 0.00409733
## ... ... ... ... ... ... ...
## 95940 143154 143146 chrX 0.532194 0.00390673 0.02920153
## 95941 143159 143177 chrX 0.513681 0.00870711 0.01454613
## 95942 143177 143159 chrX 0.513681 0.01454613 0.00870711
## 95943 143198 143199 chrX 0.532867 0.00947622 0.00250734
## 95944 143199 143198 chrX 0.532867 0.00250734 0.00947622
## TStat Pval FDR VarQuantile1 VarQuantile2
## <numeric> <numeric> <numeric> <numeric> <numeric>
## 1 23.6661 3.09624e-83 1.27170e-80 0.570949 0.918611
## 2 12.9497 3.27901e-33 1.18820e-31 0.570949 0.744449
## 3 13.1911 3.11045e-34 1.19563e-32 0.480444 0.918611
## 4 23.6661 3.09624e-83 1.27170e-80 0.918611 0.570949
## 5 13.1911 3.11045e-34 1.19563e-32 0.918611 0.480444
## ... ... ... ... ... ...
## 95940 13.9432 1.78166e-37 8.28743e-36 0.465737 0.918982
## 95941 13.2800 1.29936e-34 5.11388e-33 0.689446 0.799879
## 95942 13.2800 1.29936e-34 5.11388e-33 0.799879 0.689446
## 95943 13.9678 1.39133e-37 6.50832e-36 0.711361 0.329707
## 95944 13.9678 1.39133e-37 6.50832e-36 0.329707 0.711361
metadata(cA)[[1]]
## GRanges object with 143229 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## Mono chr1 752472-752972 *
## B chr1 762688-763188 *
## B chr1 801007-801507 *
## B chr1 805039-805539 *
## CLP chr1 845326-845826 *
## ... ... ... ...
## CD4.N chrX 154841576-154842076 *
## NK chrX 154842394-154842894 *
## Erythroid chrX 154912373-154912873 *
## PreB chrX 154996185-154996685 *
## GMP chrX 154996992-154997492 *
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
metadata(cA)[[1]][5]
## GRanges object with 1 range and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## CLP chr1 845326-845826 *
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
metadata(cA)[[1]][8]
## GRanges object with 1 range and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## CD4.M chr1 858370-858870 *
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
markerGenes <- c(
"CD34", #Early Progenitor
"GATA1", #Erythroid
"PAX5", "MS4A1", #B-Cell Trajectory
"CD14", #Monocytes
"CD3D", "CD8A", "TBX21", "IL7R" #TCells
)
p <- plotBrowserTrack(
ArchRProj = projHeme5,
groupBy = "Clusters2",
geneSymbol = markerGenes,
upstream = 50000,
downstream = 50000,
loops = getCoAccessibility(projHeme5)
)
## ArchR logging to : ArchRLogs/ArchR-plotBrowserTrack-246057fe6264-Date-2023-01-15_Time-21-56-54.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:56:54 : Validating Region, 0.002 mins elapsed.
## GRanges object with 9 ranges and 2 metadata columns:
## seqnames ranges strand | gene_id symbol
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chr1 208059883-208084683 - | 947 CD34
## [2] chrX 48644982-48652717 + | 2623 GATA1
## [3] chr9 36838531-37034476 - | 5079 PAX5
## [4] chr11 60223282-60238225 + | 931 MS4A1
## [5] chr5 140011313-140013286 - | 929 CD14
## [6] chr11 118209789-118213459 - | 915 CD3D
## [7] chr2 87011728-87035519 - | 925 CD8A
## [8] chr17 45810610-45823485 + | 30009 TBX21
## [9] chr5 35856977-35879705 + | 3575 IL7R
## -------
## seqinfo: 24 sequences from hg19 genome
## 2023-01-15 21:56:54 : Adding Bulk Tracks (1 of 9), 0.002 mins elapsed.
## 2023-01-15 21:56:55 : Adding Feature Tracks (1 of 9), 0.016 mins elapsed.
## 2023-01-15 21:56:55 : Adding Loop Tracks (1 of 9), 0.017 mins elapsed.
## 2023-01-15 21:56:55 : Adding Gene Tracks (1 of 9), 0.019 mins elapsed.
## 2023-01-15 21:56:55 : Plotting, 0.021 mins elapsed.
## 2023-01-15 21:56:56 : Adding Bulk Tracks (2 of 9), 0.036 mins elapsed.
## 2023-01-15 21:56:57 : Adding Feature Tracks (2 of 9), 0.048 mins elapsed.
## 2023-01-15 21:56:57 : Adding Loop Tracks (2 of 9), 0.049 mins elapsed.
## 2023-01-15 21:56:57 : Adding Gene Tracks (2 of 9), 0.052 mins elapsed.
## 2023-01-15 21:56:57 : Plotting, 0.055 mins elapsed.
## 2023-01-15 21:56:58 : Adding Bulk Tracks (3 of 9), 0.071 mins elapsed.
## 2023-01-15 21:56:59 : Adding Feature Tracks (3 of 9), 0.083 mins elapsed.
## 2023-01-15 21:56:59 : Adding Loop Tracks (3 of 9), 0.084 mins elapsed.
## 2023-01-15 21:56:59 : Adding Gene Tracks (3 of 9), 0.091 mins elapsed.
## 2023-01-15 21:56:59 : Plotting, 0.094 mins elapsed.
## 2023-01-15 21:57:00 : Adding Bulk Tracks (4 of 9), 0.11 mins elapsed.
## 2023-01-15 21:57:01 : Adding Feature Tracks (4 of 9), 0.121 mins elapsed.
## 2023-01-15 21:57:01 : Adding Loop Tracks (4 of 9), 0.122 mins elapsed.
## 2023-01-15 21:57:01 : Adding Gene Tracks (4 of 9), 0.127 mins elapsed.
## 2023-01-15 21:57:02 : Plotting, 0.13 mins elapsed.
## 2023-01-15 21:57:02 : Adding Bulk Tracks (5 of 9), 0.142 mins elapsed.
## 2023-01-15 21:57:03 : Adding Feature Tracks (5 of 9), 0.155 mins elapsed.
## 2023-01-15 21:57:03 : Adding Loop Tracks (5 of 9), 0.156 mins elapsed.
## 2023-01-15 21:57:03 : Adding Gene Tracks (5 of 9), 0.161 mins elapsed.
## 2023-01-15 21:57:04 : Plotting, 0.164 mins elapsed.
## 2023-01-15 21:57:05 : Adding Bulk Tracks (6 of 9), 0.18 mins elapsed.
## 2023-01-15 21:57:05 : Adding Feature Tracks (6 of 9), 0.193 mins elapsed.
## 2023-01-15 21:57:05 : Adding Loop Tracks (6 of 9), 0.194 mins elapsed.
## 2023-01-15 21:57:06 : Adding Gene Tracks (6 of 9), 0.2 mins elapsed.
## 2023-01-15 21:57:06 : Plotting, 0.204 mins elapsed.
## 2023-01-15 21:57:07 : Adding Bulk Tracks (7 of 9), 0.218 mins elapsed.
## 2023-01-15 21:57:08 : Adding Feature Tracks (7 of 9), 0.231 mins elapsed.
## 2023-01-15 21:57:08 : Adding Loop Tracks (7 of 9), 0.232 mins elapsed.
## 2023-01-15 21:57:08 : Adding Gene Tracks (7 of 9), 0.243 mins elapsed.
## 2023-01-15 21:57:09 : Plotting, 0.246 mins elapsed.
## 2023-01-15 21:57:09 : Adding Bulk Tracks (8 of 9), 0.26 mins elapsed.
## 2023-01-15 21:57:10 : Adding Feature Tracks (8 of 9), 0.273 mins elapsed.
## 2023-01-15 21:57:10 : Adding Loop Tracks (8 of 9), 0.273 mins elapsed.
## 2023-01-15 21:57:11 : Adding Gene Tracks (8 of 9), 0.282 mins elapsed.
## 2023-01-15 21:57:11 : Plotting, 0.286 mins elapsed.
## 2023-01-15 21:57:12 : Adding Bulk Tracks (9 of 9), 0.301 mins elapsed.
## 2023-01-15 21:57:13 : Adding Feature Tracks (9 of 9), 0.313 mins elapsed.
## 2023-01-15 21:57:13 : Adding Loop Tracks (9 of 9), 0.314 mins elapsed.
## 2023-01-15 21:57:13 : Adding Gene Tracks (9 of 9), 0.321 mins elapsed.
## 2023-01-15 21:57:13 : Plotting, 0.324 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotBrowserTrack-246057fe6264-Date-2023-01-15_Time-21-56-54.log
grid::grid.newpage()
grid::grid.draw(p$CD14)
projHeme5 <- addPeak2GeneLinks(
ArchRProj = projHeme5,
reducedDims = "IterativeLSI"
)
## ArchR logging to : ArchRLogs/ArchR-addPeak2GeneLinks-246018b5a513-Date-2023-01-15_Time-21-57-15.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:57:15 : Getting Available Matrices, 0.002 mins elapsed.
## 2023-01-15 21:57:15 : Filtered Low Prediction Score Cells (1250 of 10250, 0.122), 0.003 mins elapsed.
## 2023-01-15 21:57:15 : Computing KNN, 0.007 mins elapsed.
## 2023-01-15 21:57:15 : Identifying Non-Overlapping KNN pairs, 0.009 mins elapsed.
## 2023-01-15 21:57:19 : Identified 494 Groupings!, 0.073 mins elapsed.
## 2023-01-15 21:57:19 : Getting Group RNA Matrix, 0.074 mins elapsed.
## 2023-01-15 21:58:01 : Getting Group ATAC Matrix, 0.77 mins elapsed.
## 2023-01-15 21:58:42 : Normalizing Group Matrices, 1.45 mins elapsed.
## 2023-01-15 21:58:46 : Finding Peak Gene Pairings, 1.516 mins elapsed.
## 2023-01-15 21:58:46 : Computing Correlations, 1.522 mins elapsed.
## 2023-01-15 21:58:52 : Completed Peak2Gene Correlations!, 1.619 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addPeak2GeneLinks-246018b5a513-Date-2023-01-15_Time-21-57-15.log
p2g <- getPeak2GeneLinks(
ArchRProj = projHeme5,
corCutOff = 0.45,
resolution = 1,
returnLoops = FALSE
)
p2g
## DataFrame with 39146 rows and 6 columns
## idxATAC idxRNA Correlation FDR VarQATAC VarQRNA
## <integer> <integer> <numeric> <numeric> <numeric> <numeric>
## 1 46 4 0.518798 5.37027e-34 0.708809 0.266115
## 2 44 5 0.617452 1.72141e-51 0.949026 0.763615
## 3 51 5 0.451046 5.69674e-25 0.912092 0.763615
## 4 65 5 0.481414 9.17375e-29 0.667826 0.763615
## 5 79 5 0.566011 1.26131e-41 0.582927 0.763615
## ... ... ... ... ... ... ...
## 39142 143179 18590 0.508216 1.89481e-32 0.494083 0.962798
## 39143 143180 18590 0.574334 4.20739e-43 0.686327 0.962798
## 39144 143198 18590 0.510057 1.02892e-32 0.740555 0.962798
## 39145 143198 18591 0.481032 1.02954e-28 0.740555 0.953981
## 39146 143217 18595 0.591408 2.85330e-46 0.716524 0.447718
metadata(p2g)[[1]]
## GRanges object with 143229 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 752472-752972 *
## [2] chr1 762688-763188 *
## [3] chr1 801007-801507 *
## [4] chr1 805039-805539 *
## [5] chr1 845326-845826 *
## ... ... ... ...
## [143225] chrX 154841576-154842076 *
## [143226] chrX 154842394-154842894 *
## [143227] chrX 154912373-154912873 *
## [143228] chrX 154996185-154996685 *
## [143229] chrX 154996992-154997492 *
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
metadata(p2g)
## $peakSet
## GRanges object with 143229 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 752472-752972 *
## [2] chr1 762688-763188 *
## [3] chr1 801007-801507 *
## [4] chr1 805039-805539 *
## [5] chr1 845326-845826 *
## ... ... ... ...
## [143225] chrX 154841576-154842076 *
## [143226] chrX 154842394-154842894 *
## [143227] chrX 154912373-154912873 *
## [143228] chrX 154996185-154996685 *
## [143229] chrX 154996992-154997492 *
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
##
## $geneSet
## GRanges object with 18601 ranges and 2 metadata columns:
## seqnames ranges strand | name idx
## <Rle> <IRanges> <Rle> | <array> <array>
## [1] chr1 69091 * | OR4F5 1
## [2] chr1 762902 * | LINC00115 2
## [3] chr1 812182 * | FAM41C 3
## [4] chr1 860530 * | SAMD11 4
## [5] chr1 894679 * | NOC2L 5
## ... ... ... ... . ... ...
## [18597] chrX 154299695 * | BRCC3 708
## [18598] chrX 154444701 * | VBP1 709
## [18599] chrX 154493852 * | RAB39B 710
## [18600] chrX 154563986 * | CLIC2 711
## [18601] chrX 154842622 * | TMLHE 712
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
##
## $seATAC
## [1] "/Users/nzhang/Desktop/ArchR/HemeOutput/Peak2GeneLinks/seATAC-Group-KNN.rds"
##
## $seRNA
## [1] "/Users/nzhang/Desktop/ArchR/HemeOutput/Peak2GeneLinks/seRNA-Group-KNN.rds"
p <- plotBrowserTrack(
ArchRProj = projHeme5,
groupBy = "Clusters2",
geneSymbol = markerGenes,
upstream = 50000,
downstream = 50000,
loops = getPeak2GeneLinks(projHeme5)
)
## ArchR logging to : ArchRLogs/ArchR-plotBrowserTrack-2460745633b3-Date-2023-01-15_Time-21-58-52.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:58:52 : Validating Region, 0.002 mins elapsed.
## GRanges object with 9 ranges and 2 metadata columns:
## seqnames ranges strand | gene_id symbol
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chr1 208059883-208084683 - | 947 CD34
## [2] chrX 48644982-48652717 + | 2623 GATA1
## [3] chr9 36838531-37034476 - | 5079 PAX5
## [4] chr11 60223282-60238225 + | 931 MS4A1
## [5] chr5 140011313-140013286 - | 929 CD14
## [6] chr11 118209789-118213459 - | 915 CD3D
## [7] chr2 87011728-87035519 - | 925 CD8A
## [8] chr17 45810610-45823485 + | 30009 TBX21
## [9] chr5 35856977-35879705 + | 3575 IL7R
## -------
## seqinfo: 24 sequences from hg19 genome
## 2023-01-15 21:58:52 : Adding Bulk Tracks (1 of 9), 0.002 mins elapsed.
## 2023-01-15 21:58:53 : Adding Feature Tracks (1 of 9), 0.016 mins elapsed.
## 2023-01-15 21:58:53 : Adding Loop Tracks (1 of 9), 0.017 mins elapsed.
## 2023-01-15 21:58:53 : Adding Gene Tracks (1 of 9), 0.019 mins elapsed.
## 2023-01-15 21:58:54 : Plotting, 0.022 mins elapsed.
## 2023-01-15 21:58:55 : Adding Bulk Tracks (2 of 9), 0.041 mins elapsed.
## 2023-01-15 21:58:55 : Adding Feature Tracks (2 of 9), 0.052 mins elapsed.
## 2023-01-15 21:58:55 : Adding Loop Tracks (2 of 9), 0.053 mins elapsed.
## 2023-01-15 21:58:56 : Adding Gene Tracks (2 of 9), 0.055 mins elapsed.
## 2023-01-15 21:58:56 : Plotting, 0.058 mins elapsed.
## 2023-01-15 21:58:57 : Adding Bulk Tracks (3 of 9), 0.072 mins elapsed.
## 2023-01-15 21:58:57 : Adding Feature Tracks (3 of 9), 0.084 mins elapsed.
## 2023-01-15 21:58:57 : Adding Loop Tracks (3 of 9), 0.085 mins elapsed.
## 2023-01-15 21:58:58 : Adding Gene Tracks (3 of 9), 0.088 mins elapsed.
## 2023-01-15 21:58:58 : Plotting, 0.091 mins elapsed.
## 2023-01-15 21:58:59 : Adding Bulk Tracks (4 of 9), 0.107 mins elapsed.
## 2023-01-15 21:58:59 : Adding Feature Tracks (4 of 9), 0.119 mins elapsed.
## 2023-01-15 21:58:59 : Adding Loop Tracks (4 of 9), 0.12 mins elapsed.
## 2023-01-15 21:59:00 : Adding Gene Tracks (4 of 9), 0.122 mins elapsed.
## 2023-01-15 21:59:00 : Plotting, 0.124 mins elapsed.
## 2023-01-15 21:59:00 : Adding Bulk Tracks (5 of 9), 0.137 mins elapsed.
## 2023-01-15 21:59:01 : Adding Feature Tracks (5 of 9), 0.149 mins elapsed.
## 2023-01-15 21:59:01 : Adding Loop Tracks (5 of 9), 0.15 mins elapsed.
## 2023-01-15 21:59:01 : Adding Gene Tracks (5 of 9), 0.154 mins elapsed.
## 2023-01-15 21:59:02 : Plotting, 0.157 mins elapsed.
## 2023-01-15 21:59:03 : Adding Bulk Tracks (6 of 9), 0.172 mins elapsed.
## 2023-01-15 21:59:03 : Adding Feature Tracks (6 of 9), 0.184 mins elapsed.
## 2023-01-15 21:59:03 : Adding Loop Tracks (6 of 9), 0.185 mins elapsed.
## 2023-01-15 21:59:04 : Adding Gene Tracks (6 of 9), 0.192 mins elapsed.
## 2023-01-15 21:59:04 : Plotting, 0.195 mins elapsed.
## 2023-01-15 21:59:05 : Adding Bulk Tracks (7 of 9), 0.209 mins elapsed.
## 2023-01-15 21:59:06 : Adding Feature Tracks (7 of 9), 0.223 mins elapsed.
## 2023-01-15 21:59:06 : Adding Loop Tracks (7 of 9), 0.224 mins elapsed.
## 2023-01-15 21:59:06 : Adding Gene Tracks (7 of 9), 0.227 mins elapsed.
## 2023-01-15 21:59:06 : Plotting, 0.23 mins elapsed.
## 2023-01-15 21:59:07 : Adding Bulk Tracks (8 of 9), 0.245 mins elapsed.
## 2023-01-15 21:59:08 : Adding Feature Tracks (8 of 9), 0.256 mins elapsed.
## 2023-01-15 21:59:08 : Adding Loop Tracks (8 of 9), 0.257 mins elapsed.
## 2023-01-15 21:59:08 : Adding Gene Tracks (8 of 9), 0.261 mins elapsed.
## 2023-01-15 21:59:08 : Plotting, 0.263 mins elapsed.
## 2023-01-15 21:59:09 : Adding Bulk Tracks (9 of 9), 0.281 mins elapsed.
## 2023-01-15 21:59:10 : Adding Feature Tracks (9 of 9), 0.293 mins elapsed.
## 2023-01-15 21:59:10 : Adding Loop Tracks (9 of 9), 0.294 mins elapsed.
## 2023-01-15 21:59:10 : Adding Gene Tracks (9 of 9), 0.297 mins elapsed.
## 2023-01-15 21:59:10 : Plotting, 0.3 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotBrowserTrack-2460745633b3-Date-2023-01-15_Time-21-58-52.log
grid::grid.newpage()
grid::grid.draw(p$CD14)
p <- plotPeak2GeneHeatmap(ArchRProj = projHeme5, groupBy = "Clusters2")
## ArchR logging to : ArchRLogs/ArchR-plotPeak2GeneHeatmap-246074aaadd8-Date-2023-01-15_Time-21-59-12.log
## If there is an issue, please report to github with logFile!
## 2023-01-15 21:59:16 : Determining KNN Groups!, 0.069 mins elapsed.
## 2023-01-15 21:59:18 : Ordering Peak2Gene Links!, 0.103 mins elapsed.
## Warning: did not converge in 10 iterations
## 2023-01-15 21:59:29 : Constructing ATAC Heatmap!, 0.296 mins elapsed.
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 2023-01-15 21:59:30 : Constructing RNA Heatmap!, 0.307 mins elapsed.
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## ArchR logging successful to : ArchRLogs/ArchR-plotPeak2GeneHeatmap-246074aaadd8-Date-2023-01-15_Time-21-59-12.log
p